1
|
// $Id$
|
2
|
/**
|
3
|
* Copyright (C) 2017 EDIT
|
4
|
* European Distributed Institute of Taxonomy
|
5
|
* http://www.e-taxonomy.eu
|
6
|
*
|
7
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
8
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
9
|
*/
|
10
|
package eu.etaxonomy.cdm.io.bogota;
|
11
|
|
12
|
import java.util.HashMap;
|
13
|
import java.util.UUID;
|
14
|
|
15
|
import org.apache.log4j.Logger;
|
16
|
import org.springframework.stereotype.Component;
|
17
|
import org.springframework.transaction.TransactionStatus;
|
18
|
|
19
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
20
|
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
|
21
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
22
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
23
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
24
|
import eu.etaxonomy.cdm.model.common.Language;
|
25
|
import eu.etaxonomy.cdm.model.name.IBotanicalName;
|
26
|
import eu.etaxonomy.cdm.model.name.Rank;
|
27
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
28
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
29
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
30
|
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
|
31
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
32
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
33
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
34
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
35
|
import eu.etaxonomy.cdm.strategy.homotypicgroup.BasionymRelationCreator;
|
36
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
37
|
|
38
|
/**
|
39
|
* @author a.mueller
|
40
|
* @date 21.04.2017
|
41
|
*
|
42
|
*/
|
43
|
@Component
|
44
|
public class BogotaChecklistTaxonImport<CONFIG extends BogotaChecklistImportConfigurator>
|
45
|
extends SimpleExcelTaxonImport<CONFIG> {
|
46
|
|
47
|
private static final long serialVersionUID = -884838817884874228L;
|
48
|
private static final Logger logger = Logger.getLogger(BogotaChecklistTaxonImport.class);
|
49
|
|
50
|
private static final String ID_COL = "#";
|
51
|
private static final String AUTHOR = "Autor";
|
52
|
private static final String NAME = "Nombre";
|
53
|
private static final String GENUS = "Género";
|
54
|
private static final String FAMILIA = "Familia";
|
55
|
private static final String INFRASPECIFIC = "Taxones infraespecíficos";
|
56
|
private static final String SINONIMOS = "Sinonimos";
|
57
|
|
58
|
private static UUID rootUuid = UUID.fromString("d66eda18-4c11-4472-bfe8-f6cd5ed95c9f");
|
59
|
private static UUID plantaeUuid = UUID.fromString("032fc183-eb4f-4f19-a290-28597a849096");
|
60
|
|
61
|
@SuppressWarnings("unchecked")
|
62
|
private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewStandaloneInstance();
|
63
|
|
64
|
private String lastGenus;
|
65
|
private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
66
|
private BasionymRelationCreator basionymCreator = new BasionymRelationCreator();
|
67
|
|
68
|
|
69
|
@Override
|
70
|
protected String getWorksheetName() {
|
71
|
return "Resultados Busqueda Avanzada";
|
72
|
}
|
73
|
|
74
|
private boolean isFirst = true;
|
75
|
private TransactionStatus tx = null;
|
76
|
|
77
|
/**
|
78
|
* {@inheritDoc}
|
79
|
*/
|
80
|
@Override
|
81
|
protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
82
|
if (isFirst){
|
83
|
tx = this.startTransaction();
|
84
|
isFirst = false;
|
85
|
}
|
86
|
|
87
|
String line = state.getCurrentLine() + ": ";
|
88
|
HashMap<String, String> record = state.getOriginalRecord();
|
89
|
|
90
|
String noStr = getValue(record, ID_COL);
|
91
|
|
92
|
//species
|
93
|
TaxonNode taxonNode = makeTaxon(state, line, record, noStr);
|
94
|
|
95
|
if (taxonNode != null){
|
96
|
//synonyms
|
97
|
makeSynonyms(state, record, line, taxonNode.getTaxon(), noStr);
|
98
|
|
99
|
//infraspecific
|
100
|
makeInfraSpecific(state, record, line, taxonNode, noStr);
|
101
|
}else{
|
102
|
logger.warn(line + "No taxon node given");
|
103
|
}
|
104
|
}
|
105
|
|
106
|
|
107
|
@Override
|
108
|
protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
109
|
if (tx != null){
|
110
|
this.commitTransaction(tx);
|
111
|
tx = null;
|
112
|
}
|
113
|
}
|
114
|
|
115
|
/**
|
116
|
* @param state
|
117
|
* @param record
|
118
|
* @param line
|
119
|
* @param taxon
|
120
|
*/
|
121
|
private void makeSynonyms(SimpleExcelTaxonImportState<CONFIG> state, HashMap<String, String> record, String line,
|
122
|
Taxon taxon, String noStr) {
|
123
|
|
124
|
String synonymsStr = getValue(record, SINONIMOS);
|
125
|
if (synonymsStr != null){
|
126
|
String[] splits = synonymsStr.split(",");
|
127
|
for(String split : splits){
|
128
|
split = split.trim();
|
129
|
boolean isMisapplied = split.contains("auct.") || split.contains(" sensu ");
|
130
|
if (split.endsWith(" None")){
|
131
|
split = split.replace(" None", "").trim();
|
132
|
}
|
133
|
if (isMisapplied){
|
134
|
handleSingleMisapplied(state, split, line, taxon, noStr);
|
135
|
}else{
|
136
|
handleSingleSynonym(state, split, line, taxon, noStr);
|
137
|
}
|
138
|
}
|
139
|
}
|
140
|
basionymCreator.invoke(taxon);
|
141
|
}
|
142
|
|
143
|
/**
|
144
|
* @param state
|
145
|
* @param trim
|
146
|
* @param line
|
147
|
* @param taxon
|
148
|
* @param noStr
|
149
|
*/
|
150
|
private void handleSingleMisapplied(SimpleExcelTaxonImportState<CONFIG> state, String nameStr, String line,
|
151
|
Taxon taxon, String noStr) {
|
152
|
Rank rank = Rank.SPECIES();
|
153
|
String AUCT_NON = "auct. non ";
|
154
|
String auctStr = nameStr.contains(AUCT_NON)? AUCT_NON: nameStr.endsWith("auct.")? "auct.": null;
|
155
|
boolean auctRequired = false;
|
156
|
if (auctStr == null){
|
157
|
auctRequired = true;
|
158
|
if (nameStr.endsWith("auct.colomb.")){
|
159
|
nameStr = nameStr.replace(" auct.colomb.", "");
|
160
|
auctStr = "auct.colomb.";
|
161
|
}else if (nameStr.endsWith(" [auct.mult.non Sw.]")){
|
162
|
nameStr = nameStr.replace(" [auct.mult.non Sw.]", "");
|
163
|
auctStr = "[auct.mult.non Sw.]";
|
164
|
}else if (nameStr.endsWith(" auct.pr.p.")){
|
165
|
nameStr = nameStr.replace(" auct.pr.p.", "");
|
166
|
auctStr = "auct.pr.p.";
|
167
|
}else if (nameStr.contains(" sensu ")){
|
168
|
logger.warn(line + "sensu not yet handled correctly:" + nameStr);
|
169
|
auctRequired = false;
|
170
|
}else{
|
171
|
auctRequired = false;
|
172
|
logger.warn(line + "auct. not recognized: " + nameStr);
|
173
|
}
|
174
|
|
175
|
}else{
|
176
|
nameStr = nameStr.replace(auctStr, "").trim();
|
177
|
}
|
178
|
IBotanicalName name = (IBotanicalName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
|
179
|
name.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
180
|
name = deduplicationHelper.getExistingName(state, name);
|
181
|
if (name.isProtectedTitleCache()){
|
182
|
logger.warn(line + "Misapplied name could not be parsed: " + nameStr);
|
183
|
}
|
184
|
deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
|
185
|
|
186
|
Taxon misApp = Taxon.NewInstance(name, null);
|
187
|
if (auctRequired){
|
188
|
misApp.setAppendedPhrase(auctStr);
|
189
|
}
|
190
|
misApp.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
191
|
taxon.addMisappliedName(misApp, state.getConfig().getSecReference(), null);
|
192
|
}
|
193
|
|
194
|
|
195
|
/**
|
196
|
* @param col
|
197
|
* @return
|
198
|
*/
|
199
|
private String getNamespace() {
|
200
|
return getWorksheetName()+"."+ ID_COL;
|
201
|
}
|
202
|
|
203
|
|
204
|
/**
|
205
|
* @param state
|
206
|
* @param record
|
207
|
* @param line
|
208
|
* @param taxon
|
209
|
* @param noStr
|
210
|
*/
|
211
|
private void handleSingleSynonym(SimpleExcelTaxonImportState<CONFIG> state, String nameStr,
|
212
|
String line, Taxon taxon, String noStr) {
|
213
|
Rank rank = Rank.SPECIES();
|
214
|
IBotanicalName name = (IBotanicalName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
|
215
|
name.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
216
|
name = deduplicationHelper.getExistingName(state, name);
|
217
|
if (name.isProtectedTitleCache()){
|
218
|
logger.warn(line + "Synonym could not be parsed: " + nameStr);
|
219
|
}
|
220
|
deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
|
221
|
|
222
|
Synonym synonym = Synonym.NewInstance(name, getSecReference(state));
|
223
|
synonym.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
224
|
taxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
|
225
|
}
|
226
|
|
227
|
|
228
|
/**
|
229
|
* @param state
|
230
|
* @param line
|
231
|
* @param record
|
232
|
* @param taxon
|
233
|
* @param noStr
|
234
|
*/
|
235
|
private void makeInfraSpecific(SimpleExcelTaxonImportState<CONFIG> state, HashMap<String, String> record, String line,
|
236
|
TaxonNode speciesNode, String noStr) {
|
237
|
String subSpeciesStr = getValue(record, INFRASPECIFIC);
|
238
|
if (subSpeciesStr != null){
|
239
|
String[] splits = subSpeciesStr.split(",");
|
240
|
for(String split : splits){
|
241
|
if (split.endsWith(" None")){
|
242
|
split = split.replace(" None", "").trim();
|
243
|
}
|
244
|
Rank rank = Rank.SUBSPECIES();
|
245
|
IBotanicalName name = (IBotanicalName)parser.parseFullName(split.trim(), state.getConfig().getNomenclaturalCode(), rank);
|
246
|
name.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
247
|
name = deduplicationHelper.getExistingName(state, name);
|
248
|
if (name.isProtectedTitleCache()){
|
249
|
logger.warn(line + "Infraspecific taxon could not be parsed: " + split.trim());
|
250
|
}
|
251
|
deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
|
252
|
|
253
|
Taxon subSpecies = Taxon.NewInstance(name, getSecReference(state));
|
254
|
subSpecies.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
255
|
TaxonNode subSpeciesNode = speciesNode.addChildTaxon(subSpecies, getSecReference(state), null);
|
256
|
getTaxonNodeService().save(subSpeciesNode);
|
257
|
}
|
258
|
}
|
259
|
}
|
260
|
|
261
|
/**
|
262
|
* @param state
|
263
|
* @param line
|
264
|
* @param record
|
265
|
* @param noStr
|
266
|
* @return
|
267
|
*/
|
268
|
private TaxonNode makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, HashMap<String, String> record,
|
269
|
String noStr) {
|
270
|
|
271
|
TaxonNode familyTaxon = getFamilyTaxon(record, state);
|
272
|
if (familyTaxon == null){
|
273
|
logger.warn(line + "Family not created: " + record.get(FAMILIA));
|
274
|
}
|
275
|
|
276
|
String genusStr = getValue(record, GENUS);
|
277
|
String nameStr = getValue(record, NAME);
|
278
|
String speciesAuthorStr = getValue(record, AUTHOR);
|
279
|
|
280
|
nameStr = CdmUtils.concat(" ", nameStr, speciesAuthorStr);
|
281
|
Rank rank = Rank.SPECIES();
|
282
|
IBotanicalName name = (IBotanicalName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
|
283
|
name.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
284
|
name = deduplicationHelper.getExistingName(state, name);
|
285
|
if (name.isProtectedTitleCache()){
|
286
|
logger.warn(line + "Name could not be parsed: " + nameStr);
|
287
|
}
|
288
|
deduplicationHelper.replaceAuthorNamesAndNomRef(state, name);
|
289
|
|
290
|
Taxon taxon = Taxon.NewInstance(name, getSecReference(state));
|
291
|
|
292
|
taxon.addImportSource(noStr, getNamespace(), getSourceCitation(state), null);
|
293
|
|
294
|
String parentStr = genusStr;
|
295
|
boolean genusAsBefore = genusStr.equals(lastGenus);
|
296
|
TaxonNode parent = getParent(state, parentStr);
|
297
|
TaxonNode newNode;
|
298
|
if (parent != null){
|
299
|
if (genusAsBefore ){
|
300
|
//everything as expected
|
301
|
newNode = parent.addChildTaxon(taxon, getSecReference(state), null);
|
302
|
getTaxonNodeService().save(newNode);
|
303
|
}else{
|
304
|
logger.warn(line + "Unexpected non-missing parent");
|
305
|
newNode = null;
|
306
|
}
|
307
|
}else{
|
308
|
if (genusAsBefore){
|
309
|
logger.warn(line + "Unexpected missing genus parent");
|
310
|
newNode = null;
|
311
|
}else{
|
312
|
parent = makeGenusNode(state, record, genusStr);
|
313
|
newNode = parent.addChildTaxon(taxon, getSecReference(state), null);
|
314
|
getTaxonNodeService().save(newNode);
|
315
|
}
|
316
|
}
|
317
|
|
318
|
this.lastGenus = genusStr;
|
319
|
return newNode;
|
320
|
}
|
321
|
|
322
|
/**
|
323
|
* @param record
|
324
|
* @param state
|
325
|
* @return
|
326
|
*/
|
327
|
private TaxonNode getFamilyTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state) {
|
328
|
String familyStr = getValue(record, FAMILIA);
|
329
|
if (familyStr == null){
|
330
|
return null;
|
331
|
}
|
332
|
familyStr = familyStr.trim();
|
333
|
|
334
|
Taxon family = state.getHigherTaxon(familyStr);
|
335
|
TaxonNode familyNode;
|
336
|
if (family != null){
|
337
|
familyNode = family.getTaxonNodes().iterator().next();
|
338
|
}else{
|
339
|
IBotanicalName name = makeFamilyName(state, familyStr);
|
340
|
Reference sec = getSecReference(state);
|
341
|
family = Taxon.NewInstance(name, sec);
|
342
|
|
343
|
ITaxonTreeNode classificationNode = getClassification(state);
|
344
|
familyNode = classificationNode.addChildTaxon(family, sec, null);
|
345
|
state.putHigherTaxon(familyStr, family);
|
346
|
getTaxonNodeService().save(familyNode);
|
347
|
}
|
348
|
|
349
|
return familyNode;
|
350
|
}
|
351
|
|
352
|
|
353
|
private TaxonNode rootNode;
|
354
|
private TaxonNode getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
|
355
|
if (rootNode == null){
|
356
|
Reference sec = getSecReference(state);
|
357
|
String classificationName = state.getConfig().getClassificationName();
|
358
|
Language language = Language.DEFAULT();
|
359
|
Classification classification = Classification.NewInstance(classificationName, sec, language);
|
360
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
361
|
classification.getRootNode().setUuid(rootUuid);
|
362
|
|
363
|
IBotanicalName plantaeName = TaxonNameFactory.NewBotanicalInstance(Rank.KINGDOM());
|
364
|
plantaeName.setGenusOrUninomial("Plantae");
|
365
|
Taxon plantae = Taxon.NewInstance(plantaeName, sec);
|
366
|
TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
|
367
|
plantaeNode.setUuid(plantaeUuid);
|
368
|
getClassificationService().save(classification);
|
369
|
|
370
|
rootNode = plantaeNode;
|
371
|
}
|
372
|
return rootNode;
|
373
|
}
|
374
|
|
375
|
|
376
|
protected IBotanicalName makeFamilyName(SimpleExcelTaxonImportState<CONFIG> state, String famStr) {
|
377
|
IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
|
378
|
famStr = decapitalize(famStr);
|
379
|
name.setGenusOrUninomial(famStr);
|
380
|
name.addSource(makeOriginalSource(state));
|
381
|
return name;
|
382
|
}
|
383
|
|
384
|
/**
|
385
|
* @param state
|
386
|
* @return
|
387
|
*/
|
388
|
@Override
|
389
|
protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
|
390
|
return IdentifiableSource.NewDataImportInstance(getValue(state.getOriginalRecord(),ID_COL), getNamespace(), state.getConfig().getSourceReference());
|
391
|
}
|
392
|
|
393
|
/**
|
394
|
* @param famStr
|
395
|
* @return
|
396
|
*/
|
397
|
private String decapitalize(String famStr) {
|
398
|
String result = famStr.substring(0,1) + famStr.substring(1).toLowerCase();
|
399
|
return result;
|
400
|
}
|
401
|
|
402
|
|
403
|
protected Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state) {
|
404
|
return state.getConfig().getSecReference();
|
405
|
}
|
406
|
|
407
|
/**
|
408
|
* @param state
|
409
|
* @return
|
410
|
*/
|
411
|
protected Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
|
412
|
return state.getConfig().getSourceReference();
|
413
|
}
|
414
|
|
415
|
/**
|
416
|
* @param state
|
417
|
* @param parentStr
|
418
|
* @return
|
419
|
*/
|
420
|
private TaxonNode getParent(SimpleExcelTaxonImportState<CONFIG> state, String parentStr) {
|
421
|
Taxon taxon = state.getHigherTaxon(parentStr);
|
422
|
|
423
|
return taxon == null ? null : taxon.getTaxonNodes().iterator().next();
|
424
|
}
|
425
|
|
426
|
/**
|
427
|
* @param state
|
428
|
* @param record
|
429
|
* @param genusStr
|
430
|
* @return
|
431
|
*/
|
432
|
private TaxonNode makeGenusNode(SimpleExcelTaxonImportState<CONFIG> state,
|
433
|
HashMap<String, String> record, String genusStr) {
|
434
|
IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.GENUS());
|
435
|
name.setGenusOrUninomial(genusStr);
|
436
|
Taxon genus = Taxon.NewInstance(name, getSecReference(state));
|
437
|
TaxonNode family = getFamilyTaxon(record, state);
|
438
|
TaxonNode genusNode = family.addChildTaxon(genus, getSecReference(state), null);
|
439
|
state.putHigherTaxon(genusStr, genus);
|
440
|
genus.addSource(makeOriginalSource(state));
|
441
|
getTaxonNodeService().save(genusNode);
|
442
|
return genusNode;
|
443
|
}
|
444
|
|
445
|
}
|