ref #8404 add steplist to BgbmInstancesUpdater code
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / bogota / BogotaChecklistTaxonImport.java
1 // $Id$
2 /**
3 * Copyright (C) 2017 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.bogota;
11
12 import java.util.Map;
13 import java.util.UUID;
14
15 import org.apache.log4j.Logger;
16 import org.springframework.stereotype.Component;
17 import org.springframework.transaction.TransactionStatus;
18
19 import eu.etaxonomy.cdm.common.CdmUtils;
20 import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
21 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
22 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
23 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
24 import eu.etaxonomy.cdm.model.common.Language;
25 import eu.etaxonomy.cdm.model.name.IBotanicalName;
26 import eu.etaxonomy.cdm.model.name.Rank;
27 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
28 import eu.etaxonomy.cdm.model.reference.Reference;
29 import eu.etaxonomy.cdm.model.taxon.Classification;
30 import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
31 import eu.etaxonomy.cdm.model.taxon.Synonym;
32 import eu.etaxonomy.cdm.model.taxon.SynonymType;
33 import eu.etaxonomy.cdm.model.taxon.Taxon;
34 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
35 import eu.etaxonomy.cdm.strategy.homotypicgroup.BasionymRelationCreator;
36 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
37
38 /**
39 * @author a.mueller
40 * @since 21.04.2017
41 *
42 */
43 @Component
44 public class BogotaChecklistTaxonImport<CONFIG extends BogotaChecklistImportConfigurator>
45 extends SimpleExcelTaxonImport<CONFIG> {
46
47 private static final long serialVersionUID = -884838817884874228L;
48 private static final Logger logger = Logger.getLogger(BogotaChecklistTaxonImport.class);
49
50 private static final String ID_COL = "#";
51 private static final String AUTHOR = "Autor";
52 private static final String NAME = "Nombre";
53 private static final String GENUS = "GĂ©nero";
54 private static final String FAMILIA = "Familia";
55 private static final String INFRASPECIFIC = "Taxones infraespecĂ­ficos";
56 private static final String SINONIMOS = "Sinonimos";
57
58 private static UUID rootUuid = UUID.fromString("d66eda18-4c11-4472-bfe8-f6cd5ed95c9f");
59 private static UUID plantaeUuid = UUID.fromString("032fc183-eb4f-4f19-a290-28597a849096");
60
61 @SuppressWarnings("unchecked")
62 private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper
63 = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewStandaloneInstance();
64
65 private String lastGenus;
66 private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
67 private BasionymRelationCreator basionymCreator = new BasionymRelationCreator();
68
69
70 @Override
71 protected String getWorksheetName(CONFIG config) {
72 return "Resultados Busqueda Avanzada";
73 }
74
75 private boolean isFirst = true;
76 private TransactionStatus tx = null;
77
78 /**
79 * {@inheritDoc}
80 */
81 @Override
82 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
83 if (isFirst){
84 tx = this.startTransaction();
85 isFirst = false;
86 }
87
88 String line = state.getCurrentLine() + ": ";
89 Map<String, String> record = state.getOriginalRecord();
90
91 String noStr = getValue(record, ID_COL);
92
93 //species
94 TaxonNode taxonNode = makeTaxon(state, line, record, noStr);
95
96 if (taxonNode != null){
97 //synonyms
98 makeSynonyms(state, record, line, taxonNode.getTaxon(), noStr);
99
100 //infraspecific
101 makeInfraSpecific(state, record, line, taxonNode, noStr);
102 }else{
103 logger.warn(line + "No taxon node given");
104 }
105 }
106
107
108 @Override
109 protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
110 if (tx != null){
111 this.commitTransaction(tx);
112 tx = null;
113 }
114 }
115
116 /**
117 * @param state
118 * @param record
119 * @param line
120 * @param taxon
121 */
122 private void makeSynonyms(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record, String line,
123 Taxon taxon, String noStr) {
124
125 String synonymsStr = getValue(record, SINONIMOS);
126 if (synonymsStr != null){
127 String[] splits = synonymsStr.split(",");
128 for(String split : splits){
129 split = split.trim();
130 boolean isMisapplied = split.contains("auct.") || split.contains(" sensu ");
131 if (split.endsWith(" None")){
132 split = split.replace(" None", "").trim();
133 }
134 if (isMisapplied){
135 handleSingleMisapplied(state, split, line, taxon, noStr);
136 }else{
137 handleSingleSynonym(state, split, line, taxon, noStr);
138 }
139 }
140 }
141 basionymCreator.invoke(taxon);
142 }
143
144 /**
145 * @param state
146 * @param trim
147 * @param line
148 * @param taxon
149 * @param noStr
150 */
151 private void handleSingleMisapplied(SimpleExcelTaxonImportState<CONFIG> state, String nameStr, String line,
152 Taxon taxon, String noStr) {
153 Rank rank = Rank.SPECIES();
154 String AUCT_NON = "auct. non ";
155 String auctStr = nameStr.contains(AUCT_NON)? AUCT_NON: nameStr.endsWith("auct.")? "auct.": null;
156 boolean auctRequired = false;
157 if (auctStr == null){
158 auctRequired = true;
159 if (nameStr.endsWith("auct.colomb.")){
160 nameStr = nameStr.replace(" auct.colomb.", "");
161 auctStr = "auct.colomb.";
162 }else if (nameStr.endsWith(" [auct.mult.non Sw.]")){
163 nameStr = nameStr.replace(" [auct.mult.non Sw.]", "");
164 auctStr = "[auct.mult.non Sw.]";
165 }else if (nameStr.endsWith(" auct.pr.p.")){
166 nameStr = nameStr.replace(" auct.pr.p.", "");
167 auctStr = "auct.pr.p.";
168 }else if (nameStr.contains(" sensu ")){
169 logger.warn(line + "sensu not yet handled correctly:" + nameStr);
170 auctRequired = false;
171 }else{
172 auctRequired = false;
173 logger.warn(line + "auct. not recognized: " + nameStr);
174 }
175
176 }else{
177 nameStr = nameStr.replace(auctStr, "").trim();
178 }
179 IBotanicalName name = (IBotanicalName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
180 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
181 name = deduplicationHelper.getExistingName(state, name);
182 if (name.isProtectedTitleCache()){
183 logger.warn(line + "Misapplied name could not be parsed: " + nameStr);
184 }
185 deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
186
187 Taxon misApp = Taxon.NewInstance(name, null);
188 if (auctRequired){
189 misApp.setAppendedPhrase(auctStr);
190 }
191 misApp.addImportSource(noStr, getNamespace(state.getConfig()),
192 getSourceCitation(state), null);
193 taxon.addMisappliedName(misApp, state.getConfig().getSecReference(), null);
194 }
195
196
197 /**
198 * @param col
199 * @return
200 */
201 private String getNamespace(CONFIG config) {
202 return getWorksheetName(config)+"."+ ID_COL;
203 }
204
205
206 /**
207 * @param state
208 * @param record
209 * @param line
210 * @param taxon
211 * @param noStr
212 */
213 private void handleSingleSynonym(SimpleExcelTaxonImportState<CONFIG> state, String nameStr,
214 String line, Taxon taxon, String noStr) {
215 Rank rank = Rank.SPECIES();
216 IBotanicalName name = (IBotanicalName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
217 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
218 name = deduplicationHelper.getExistingName(state, name);
219 if (name.isProtectedTitleCache()){
220 logger.warn(line + "Synonym could not be parsed: " + nameStr);
221 }
222 deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
223
224 Synonym synonym = Synonym.NewInstance(name, getSecReference(state));
225 synonym.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
226 taxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
227 }
228
229
230 /**
231 * @param state
232 * @param line
233 * @param record
234 * @param taxon
235 * @param noStr
236 */
237 private void makeInfraSpecific(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record, String line,
238 TaxonNode speciesNode, String noStr) {
239 String subSpeciesStr = getValue(record, INFRASPECIFIC);
240 if (subSpeciesStr != null){
241 String[] splits = subSpeciesStr.split(",");
242 for(String split : splits){
243 if (split.endsWith(" None")){
244 split = split.replace(" None", "").trim();
245 }
246 Rank rank = Rank.SUBSPECIES();
247 IBotanicalName name = (IBotanicalName)parser.parseFullName(split.trim(), state.getConfig().getNomenclaturalCode(), rank);
248 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
249 name = deduplicationHelper.getExistingName(state, name);
250 if (name.isProtectedTitleCache()){
251 logger.warn(line + "Infraspecific taxon could not be parsed: " + split.trim());
252 }
253 deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
254
255 Taxon subSpecies = Taxon.NewInstance(name, getSecReference(state));
256 subSpecies.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
257 TaxonNode subSpeciesNode = speciesNode.addChildTaxon(subSpecies, getSecReference(state), null);
258 getTaxonNodeService().save(subSpeciesNode);
259 }
260 }
261 }
262
263 /**
264 * @param state
265 * @param line
266 * @param record
267 * @param noStr
268 * @return
269 */
270 private TaxonNode makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record,
271 String noStr) {
272
273 TaxonNode familyTaxon = getFamilyTaxon(record, state);
274 if (familyTaxon == null){
275 logger.warn(line + "Family not created: " + record.get(FAMILIA));
276 }
277
278 String genusStr = getValue(record, GENUS);
279 String nameStr = getValue(record, NAME);
280 String speciesAuthorStr = getValue(record, AUTHOR);
281
282 nameStr = CdmUtils.concat(" ", nameStr, speciesAuthorStr);
283 Rank rank = Rank.SPECIES();
284 IBotanicalName name = (IBotanicalName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
285 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
286 name = deduplicationHelper.getExistingName(state, name);
287 if (name.isProtectedTitleCache()){
288 logger.warn(line + "Name could not be parsed: " + nameStr);
289 }
290 deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
291
292 Taxon taxon = Taxon.NewInstance(name, getSecReference(state));
293
294 taxon.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
295
296 String parentStr = genusStr;
297 boolean genusAsBefore = genusStr.equals(lastGenus);
298 TaxonNode parent = getParent(state, parentStr);
299 TaxonNode newNode;
300 if (parent != null){
301 if (genusAsBefore ){
302 //everything as expected
303 newNode = parent.addChildTaxon(taxon, getSecReference(state), null);
304 getTaxonNodeService().save(newNode);
305 }else{
306 logger.warn(line + "Unexpected non-missing parent");
307 newNode = null;
308 }
309 }else{
310 if (genusAsBefore){
311 logger.warn(line + "Unexpected missing genus parent");
312 newNode = null;
313 }else{
314 parent = makeGenusNode(state, record, genusStr);
315 newNode = parent.addChildTaxon(taxon, getSecReference(state), null);
316 getTaxonNodeService().save(newNode);
317 }
318 }
319
320 this.lastGenus = genusStr;
321 return newNode;
322 }
323
324 /**
325 * @param record
326 * @param state
327 * @return
328 */
329 private TaxonNode getFamilyTaxon(Map<String, String> record, SimpleExcelTaxonImportState<CONFIG> state) {
330 String familyStr = getValue(record, FAMILIA);
331 if (familyStr == null){
332 return null;
333 }
334 familyStr = familyStr.trim();
335
336 Taxon family = state.getHigherTaxon(familyStr);
337 TaxonNode familyNode;
338 if (family != null){
339 familyNode = family.getTaxonNodes().iterator().next();
340 }else{
341 IBotanicalName name = makeFamilyName(state, familyStr);
342 Reference sec = getSecReference(state);
343 family = Taxon.NewInstance(name, sec);
344
345 ITaxonTreeNode classificationNode = getClassification(state);
346 familyNode = classificationNode.addChildTaxon(family, sec, null);
347 state.putHigherTaxon(familyStr, family);
348 getTaxonNodeService().save(familyNode);
349 }
350
351 return familyNode;
352 }
353
354
355 private TaxonNode rootNode;
356 private TaxonNode getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
357 if (rootNode == null){
358 Reference sec = getSecReference(state);
359 String classificationName = state.getConfig().getClassificationName();
360 Language language = Language.DEFAULT();
361 Classification classification = Classification.NewInstance(classificationName, sec, language);
362 classification.setUuid(state.getConfig().getClassificationUuid());
363 classification.getRootNode().setUuid(rootUuid);
364
365 IBotanicalName plantaeName = TaxonNameFactory.NewBotanicalInstance(Rank.KINGDOM());
366 plantaeName.setGenusOrUninomial("Plantae");
367 Taxon plantae = Taxon.NewInstance(plantaeName, sec);
368 TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
369 plantaeNode.setUuid(plantaeUuid);
370 getClassificationService().save(classification);
371
372 rootNode = plantaeNode;
373 }
374 return rootNode;
375 }
376
377
378 protected IBotanicalName makeFamilyName(SimpleExcelTaxonImportState<CONFIG> state, String famStr) {
379 IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
380 famStr = decapitalize(famStr);
381 name.setGenusOrUninomial(famStr);
382 name.addSource(makeOriginalSource(state));
383 return name;
384 }
385
386 /**
387 * @param state
388 * @return
389 */
390 @Override
391 protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
392 return IdentifiableSource.NewDataImportInstance(getValue(state.getOriginalRecord(),ID_COL),
393 getNamespace(state.getConfig()), state.getConfig().getSourceReference());
394 }
395
396 /**
397 * @param famStr
398 * @return
399 */
400 private String decapitalize(String famStr) {
401 String result = famStr.substring(0,1) + famStr.substring(1).toLowerCase();
402 return result;
403 }
404
405
406 protected Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state) {
407 return state.getConfig().getSecReference();
408 }
409
410 /**
411 * @param state
412 * @return
413 */
414 protected Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
415 return state.getConfig().getSourceReference();
416 }
417
418 /**
419 * @param state
420 * @param parentStr
421 * @return
422 */
423 private TaxonNode getParent(SimpleExcelTaxonImportState<CONFIG> state, String parentStr) {
424 Taxon taxon = state.getHigherTaxon(parentStr);
425
426 return taxon == null ? null : taxon.getTaxonNodes().iterator().next();
427 }
428
429 /**
430 * @param state
431 * @param record
432 * @param genusStr
433 * @return
434 */
435 private TaxonNode makeGenusNode(SimpleExcelTaxonImportState<CONFIG> state,
436 Map<String, String> record, String genusStr) {
437 IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.GENUS());
438 name.setGenusOrUninomial(genusStr);
439 Taxon genus = Taxon.NewInstance(name, getSecReference(state));
440 TaxonNode family = getFamilyTaxon(record, state);
441 TaxonNode genusNode = family.addChildTaxon(genus, getSecReference(state), null);
442 state.putHigherTaxon(genusStr, genus);
443 genus.addSource(makeOriginalSource(state));
444 getTaxonNodeService().save(genusNode);
445 return genusNode;
446 }
447
448 }