1
|
/**
|
2
|
* Copyright (C) 2020 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.casearia;
|
10
|
|
11
|
import java.util.ArrayList;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.HashSet;
|
14
|
import java.util.List;
|
15
|
import java.util.Map;
|
16
|
import java.util.Set;
|
17
|
import java.util.UUID;
|
18
|
|
19
|
import org.apache.log4j.Logger;
|
20
|
import org.springframework.stereotype.Component;
|
21
|
|
22
|
import eu.etaxonomy.cdm.api.service.config.SynonymDeletionConfigurator;
|
23
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
24
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
25
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
26
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
27
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
28
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
29
|
import eu.etaxonomy.cdm.model.common.Language;
|
30
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
31
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
32
|
import eu.etaxonomy.cdm.model.name.Rank;
|
33
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
34
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
35
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
36
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
37
|
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
38
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
39
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
40
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
41
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
42
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
43
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
44
|
import eu.etaxonomy.cdm.model.term.DefinedTerm;
|
45
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
46
|
|
47
|
/**
|
48
|
* Taxon import for Casearia from Kew world checklist of plants.
|
49
|
*
|
50
|
* @author a.mueller
|
51
|
* @since 12.05.2020
|
52
|
*/
|
53
|
@Component
|
54
|
public class CaseariaTaxonImport extends SimpleExcelTaxonImport<CaseariaImportConfigurator>{
|
55
|
|
56
|
private static final long serialVersionUID = 7686154384296707819L;
|
57
|
private static final Logger logger = Logger.getLogger(CaseariaTaxonImport.class);
|
58
|
|
59
|
protected static final String TAXON_MAPPING = "TaxonMapping";
|
60
|
private static final String NAME_CIT = "NameCit";
|
61
|
private static final String IPNI_ID = "ipni_id";
|
62
|
private static final String PLANT_NAME_ID = "plant_name_id";
|
63
|
private static final String TAXON_RANK = "taxon_rank";
|
64
|
private static final String TAXON_STATUS = "taxon_status";
|
65
|
private static final String NOMENCLATURAL_REMARKS = "nomenclatural_remarks";
|
66
|
private static final String ACCEPTED_PLANT_NAME_ID = "accepted_plant_name_id";
|
67
|
private static final String TAXON_NAME = "taxon_name";
|
68
|
private static final String TAXON_AUTHORS = "taxon_authors";
|
69
|
private static final String FAMILY = "family";
|
70
|
private static final String FIRST_PUBLISHED = "first_published";
|
71
|
private static final String PUB_TYPE = "PubType";
|
72
|
private static final String VOLUME_AND_PAGE = "volume_and_page";
|
73
|
private static final String PLACE_OF_PUBLICATION = "place_of_publication";
|
74
|
private static final String PRIMARY_AUTHOR = "primary_author";
|
75
|
private static final String PARENTHETICAL_AUTHOR = "parenthetical_author";
|
76
|
private static final String INFRASPECIFIC_RANK = "infraspecific_rank";
|
77
|
private static final String INFRASPECIES = "infraspecies";
|
78
|
private static final String SPECIES = "species";
|
79
|
private static final String GENUS = "genus";
|
80
|
|
81
|
private static final int RECORDS_PER_TRANSACTION = 500;
|
82
|
private static boolean logMissingIpniId = false;
|
83
|
|
84
|
private Map<String, UUID> taxonMapping = new HashMap<>();
|
85
|
private Reference secRef = null;
|
86
|
private Set<UUID> createdNames = new HashSet<>();
|
87
|
|
88
|
private SimpleExcelTaxonImportState<CaseariaImportConfigurator> state;
|
89
|
private NonViralNameParserImpl parser = new NonViralNameParserImpl();
|
90
|
|
91
|
|
92
|
@Override
|
93
|
protected void firstPass(SimpleExcelTaxonImportState<CaseariaImportConfigurator> state) {
|
94
|
int line = state.getCurrentLine();
|
95
|
if ((line % RECORDS_PER_TRANSACTION) == 0){
|
96
|
newTransaction(state);
|
97
|
System.out.println(line);
|
98
|
}
|
99
|
|
100
|
this.state = state;
|
101
|
Map<String, String> record = state.getOriginalRecord();
|
102
|
|
103
|
String fullCitation = getValue(record, NAME_CIT);
|
104
|
String ipniId = getValue(record, IPNI_ID);
|
105
|
String sourceId = getValue(record, PLANT_NAME_ID);
|
106
|
String rankStr = getValue(record, TAXON_RANK);
|
107
|
String status = getValue(record, TAXON_STATUS);
|
108
|
String nomenclaturalRemarks = getValue(record, NOMENCLATURAL_REMARKS);
|
109
|
String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
|
110
|
String taxonNameStr = getValue(record, TAXON_NAME);
|
111
|
String taxonAuthors = getValue(record, TAXON_AUTHORS);
|
112
|
|
113
|
String fullNameStr = CdmUtils.concat(" ", taxonNameStr,taxonAuthors);
|
114
|
String row = String.valueOf(line) + "("+fullNameStr+"): ";
|
115
|
|
116
|
boolean isNewName = true;
|
117
|
|
118
|
try {
|
119
|
|
120
|
List<NomenclaturalStatusType> statusTypes = new ArrayList<>();
|
121
|
Class<? extends CdmBase> taxonClazz = makeStatus(status, sourceId, accId, row, statusTypes);
|
122
|
|
123
|
|
124
|
Rank rank = state.getTransformer().getRankByKey(rankStr);
|
125
|
|
126
|
TaxonName name = parser.parseReferencedName(fullCitation, state.getConfig().getNomenclaturalCode(), rank);
|
127
|
if (name.isProtectedFullTitleCache() || name.isProtectedTitleCache() || name.isProtectedNameCache()
|
128
|
|| name.isProtectedAuthorshipCache()){
|
129
|
logger.warn(row + "Name not parsable: " + fullCitation);
|
130
|
name.setTitleCache(fullNameStr, true);
|
131
|
name.setNameCache(taxonNameStr, true);
|
132
|
}else{
|
133
|
testParsedName(state, name, row, null);
|
134
|
}
|
135
|
name.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
|
136
|
name = dedupliateNameParts(name);
|
137
|
getNameService().saveOrUpdate(name);
|
138
|
createdNames.add(name.getUuid());
|
139
|
|
140
|
handleNomenclRemarkAndNameStatus(nomenclaturalRemarks, row, isNewName, name, statusTypes);
|
141
|
|
142
|
TaxonBase<?> taxonBase;
|
143
|
if (taxonClazz == Taxon.class){
|
144
|
taxonBase = Taxon.NewInstance(name, getSecRef());
|
145
|
}else{
|
146
|
taxonBase = Synonym.NewInstance(name, getSecRef());
|
147
|
}
|
148
|
taxonBase.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
|
149
|
getTaxonService().saveOrUpdate(taxonBase);
|
150
|
|
151
|
if (!isBlank(ipniId)){
|
152
|
DefinedTerm ipniIdIdentifierType = DefinedTerm.IDENTIFIER_NAME_IPNI();
|
153
|
name.addIdentifier(ipniId, ipniIdIdentifierType);
|
154
|
}else{
|
155
|
if(logMissingIpniId){
|
156
|
logger.warn(row + "IPNI id is missing.");
|
157
|
}
|
158
|
}
|
159
|
|
160
|
UUID uuid = taxonMapping.put(sourceId, taxonBase.getUuid());{
|
161
|
if (uuid != null){
|
162
|
logger.warn(row + "sourceId already existed in taxonMapping: " + sourceId);
|
163
|
}
|
164
|
}
|
165
|
if(taxonBase.isInstanceOf(Taxon.class)){
|
166
|
UUID existingUuid = taxonMapping.put(name.getNameCache(), taxonBase.getUuid());
|
167
|
if (existingUuid != null){
|
168
|
logger.warn(row + name.getNameCache() + " has multiple instances in file");
|
169
|
}
|
170
|
}
|
171
|
} catch (UndefinedTransformerMethodException e) {
|
172
|
e.printStackTrace();
|
173
|
}
|
174
|
}
|
175
|
|
176
|
private TaxonName dedupliateNameParts(TaxonName name) {
|
177
|
if (state.getConfig().isDoDeduplicate()){
|
178
|
state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
|
179
|
}
|
180
|
return name;
|
181
|
}
|
182
|
|
183
|
private Class<? extends CdmBase> makeStatus(String status, String sourceId,
|
184
|
String accId, String row, List<NomenclaturalStatusType> statusTypes) {
|
185
|
|
186
|
Class<? extends CdmBase> clazz;
|
187
|
if ("Accepted".equals(status) || "Unplaced".equals(status) || "Misapplied".equals(status)){
|
188
|
clazz = Taxon.class;
|
189
|
}else if ("Synonym".equals(status)){
|
190
|
clazz = (accId == null)? Taxon.class : Synonym.class;
|
191
|
}else if("Illegitimate".equals(status)){
|
192
|
clazz = getIllegInvalidStatus(sourceId, accId);
|
193
|
statusTypes.add(NomenclaturalStatusType.ILLEGITIMATE());
|
194
|
}else if ("Invalid".equals(status)){
|
195
|
clazz = getIllegInvalidStatus(sourceId, accId);
|
196
|
statusTypes.add(NomenclaturalStatusType.INVALID());
|
197
|
}else{
|
198
|
logger.warn(row + "Unhandled status: " + status);
|
199
|
clazz = Taxon.class; //to do something
|
200
|
}
|
201
|
return clazz;
|
202
|
}
|
203
|
|
204
|
private void handleNomenclRemarkAndNameStatus(String nomenclaturalRemarks, String row, boolean isNewName, TaxonName name,
|
205
|
List<NomenclaturalStatusType> statusTypes) {
|
206
|
|
207
|
NomenclaturalStatusType remarkType = null;
|
208
|
NomenclaturalStatusType statusType = statusTypes.isEmpty()? null: statusTypes.iterator().next();
|
209
|
if (nomenclaturalRemarks == null){
|
210
|
//nothing to do
|
211
|
}else if (", nom. illeg.".equals(nomenclaturalRemarks)){
|
212
|
remarkType = NomenclaturalStatusType.ILLEGITIMATE();
|
213
|
}else if (", nom. cons.".equals(nomenclaturalRemarks)){
|
214
|
remarkType = NomenclaturalStatusType.CONSERVED();
|
215
|
}else if (", nom. nud.".equals(nomenclaturalRemarks)){
|
216
|
remarkType = NomenclaturalStatusType.NUDUM();
|
217
|
}else if (", nom. provis.".equals(nomenclaturalRemarks)){
|
218
|
remarkType = NomenclaturalStatusType.PROVISIONAL();
|
219
|
}else if (", nom. rej.".equals(nomenclaturalRemarks)){
|
220
|
remarkType = NomenclaturalStatusType.REJECTED();
|
221
|
}else if (", nom. subnud.".equals(nomenclaturalRemarks)){
|
222
|
remarkType = NomenclaturalStatusType.SUBNUDUM();
|
223
|
}else if (", nom. superfl.".equals(nomenclaturalRemarks)){
|
224
|
remarkType = NomenclaturalStatusType.SUPERFLUOUS();
|
225
|
}else if (", not validly publ.".equals(nomenclaturalRemarks)){
|
226
|
statusTypes.add(NomenclaturalStatusType.INVALID());
|
227
|
}else if (", opus utique oppr.".equals(nomenclaturalRemarks)){
|
228
|
statusTypes.add(NomenclaturalStatusType.OPUS_UTIQUE_OPPR());
|
229
|
}else {
|
230
|
logger.warn(row + "Unhandled nomenclatural remark: " + nomenclaturalRemarks);
|
231
|
}
|
232
|
|
233
|
NomenclaturalStatusType kewType = remarkType != null? remarkType : statusType;
|
234
|
if (isNewName){
|
235
|
if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
|
236
|
logger.warn(row + "Kew suggests 2 different nom. status. types for new name. The status from nomenclatural_remarks was taken.");
|
237
|
}
|
238
|
if (kewType != null){
|
239
|
name.addStatus(kewType, getSecRef(), null);
|
240
|
}
|
241
|
}else{
|
242
|
NomenclaturalStatusType existingType = null;
|
243
|
if (!name.getStatus().isEmpty()){
|
244
|
existingType = name.getStatus().iterator().next().getType();
|
245
|
}
|
246
|
if (existingType != null && kewType != null){
|
247
|
if (!existingType.equals(kewType)){
|
248
|
logger.warn(row + "Existing name status "+existingType.getTitleCache()+" differs from Kew status " + kewType.getTitleCache() + ". Key status ignored");
|
249
|
}
|
250
|
}else if (existingType != null && kewType == null){
|
251
|
logger.warn(row + "Info: Existing name has a name status "+existingType.getTitleCache()+" but Kew name has no status. Existing status kept.");
|
252
|
}else if (existingType == null && kewType != null){
|
253
|
if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
|
254
|
logger.warn(row + "Existing name has no status while Kew name suggests a status (but 2 different status form status and nomenclatural_remarks field).");
|
255
|
}else{
|
256
|
logger.warn(row + "Existing name has no status while Kew name suggests a status ("+kewType.getTitleCache()+"). Kew status ignored.");
|
257
|
}
|
258
|
}
|
259
|
}
|
260
|
}
|
261
|
|
262
|
private void newTransaction(SimpleExcelTaxonImportState<CaseariaImportConfigurator> state) {
|
263
|
commitTransaction(state.getTransactionStatus());
|
264
|
secRef = null;
|
265
|
state.getDeduplicationHelper().reset();
|
266
|
state.setSourceReference(null);
|
267
|
System.gc();
|
268
|
state.setTransactionStatus(startTransaction());
|
269
|
}
|
270
|
|
271
|
private Reference getSecRef() {
|
272
|
if (secRef == null){
|
273
|
secRef = getReferenceService().find(state.getConfig().getSecUuid());
|
274
|
if (secRef == null){
|
275
|
secRef = ReferenceFactory.newDatabase();
|
276
|
secRef.setTitle("Casearia Database");
|
277
|
}
|
278
|
}
|
279
|
return secRef;
|
280
|
}
|
281
|
|
282
|
private Class<? extends CdmBase> getIllegInvalidStatus(String sourceId, String accId) {
|
283
|
if (sourceId.equals(accId)){
|
284
|
return Taxon.class;
|
285
|
}else if(accId != null){
|
286
|
return Synonym.class;
|
287
|
}
|
288
|
return null;
|
289
|
}
|
290
|
|
291
|
|
292
|
private void testParsedName(SimpleExcelTaxonImportState<CaseariaImportConfigurator> state, TaxonName name,
|
293
|
String row, String fullCitation) throws UndefinedTransformerMethodException {
|
294
|
|
295
|
Map<String, String> record = state.getOriginalRecord();
|
296
|
|
297
|
// publication_author
|
298
|
|
299
|
String rankStr = getValue(record, TAXON_RANK);
|
300
|
String nameCache = getValue(record, TAXON_NAME);
|
301
|
String authorshipCache = getValue(record, TAXON_AUTHORS);
|
302
|
String genus = getValue(record, GENUS);
|
303
|
String species = getValue(record, SPECIES);
|
304
|
String infraspecies = getValue(record, INFRASPECIES);
|
305
|
String infraSpecRank = getValue(record, INFRASPECIFIC_RANK);
|
306
|
String basionymAuthor = getValue(record, PARENTHETICAL_AUTHOR);
|
307
|
String combinationAuthor = getValue(record, PRIMARY_AUTHOR);
|
308
|
String place_of_publication = getValue(record, PLACE_OF_PUBLICATION);
|
309
|
String volume_and_page = getValue(record, VOLUME_AND_PAGE);
|
310
|
String pubType = getValue(record, PUB_TYPE);
|
311
|
String yearPublished = getValue(record, FIRST_PUBLISHED);
|
312
|
|
313
|
String fullName = CdmUtils.concat(" ", nameCache, authorshipCache);
|
314
|
|
315
|
if (!CdmUtils.nullSafeEqual(name.getNameCache(), nameCache)){
|
316
|
logger.warn(row + "Unexpected nameCache: " + nameCache);
|
317
|
}
|
318
|
if (!CdmUtils.nullSafeEqual(name.getTitleCache(), fullName)){
|
319
|
logger.warn(row + "Unexpected titleCache: <->" + name.getTitleCache());
|
320
|
}
|
321
|
if (!CdmUtils.nullSafeEqual(name.getGenusOrUninomial(),genus)){
|
322
|
logger.warn(row + "Unexpected genus: " + genus);
|
323
|
}
|
324
|
if (!CdmUtils.nullSafeEqual(name.getSpecificEpithet(), species)){
|
325
|
logger.warn(row + "Unexpected species epithet: " + name.getSpecificEpithet() +"<->"+ species);
|
326
|
}
|
327
|
if (!CdmUtils.nullSafeEqual(name.getInfraSpecificEpithet(), infraspecies)){
|
328
|
logger.warn(row + "Unexpected infraspecific epithet: " + name.getInfraSpecificEpithet() +"<->"+ infraspecies);
|
329
|
}
|
330
|
if (!CdmUtils.nullSafeEqual(name.getAuthorshipCache(), authorshipCache)){
|
331
|
logger.warn(row + "Unexpected authors: " + name.getAuthorshipCache() +"<->"+ authorshipCache);
|
332
|
}
|
333
|
String combinationAndExAuthor = authorTitle(name.getCombinationAuthorship(), name.getExCombinationAuthorship());
|
334
|
if (!CdmUtils.nullSafeEqual(combinationAndExAuthor, combinationAuthor)){
|
335
|
logger.warn(row + "Unexpected combination author: " + combinationAndExAuthor +"<->"+ combinationAuthor);
|
336
|
}
|
337
|
String basionymAndExAuthor = authorTitle(name.getBasionymAuthorship(), name.getExBasionymAuthorship());
|
338
|
if (!CdmUtils.nullSafeEqual(basionymAndExAuthor, basionymAuthor)){
|
339
|
logger.warn(row + "Unexpected basionym author: " + basionymAndExAuthor +"<->"+ basionymAuthor);
|
340
|
}
|
341
|
Rank rank = state.getTransformer().getRankByKey(rankStr);
|
342
|
if (!rank.equals(name.getRank())){
|
343
|
logger.warn(row + "Unexpected rank: " + rankStr);
|
344
|
}
|
345
|
|
346
|
Reference nomRef = name.getNomenclaturalReference();
|
347
|
if (nomRef == null){
|
348
|
if (fullCitation != null){
|
349
|
NonViralNameParserImpl parser = new NonViralNameParserImpl();
|
350
|
TaxonName parsedName = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
|
351
|
if (parsedName.getNomenclaturalReference() != null){
|
352
|
name.setNomenclaturalReference(parsedName.getNomenclaturalReference());
|
353
|
logger.warn(row + "Nom.ref. was missing. Taken from Kew");
|
354
|
}else{
|
355
|
logger.warn(row + "Nom. ref. is missing or can not be parsed");
|
356
|
}
|
357
|
}else{
|
358
|
logger.warn(row + "NomRef is missing.");
|
359
|
}
|
360
|
}else{
|
361
|
if ("A".equals(pubType) && nomRef.getType() != ReferenceType.Article){
|
362
|
logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
|
363
|
}
|
364
|
if ("B".equals(pubType) && nomRef.getType() != ReferenceType.Book){
|
365
|
logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
|
366
|
}
|
367
|
String year = normalizeYear(yearPublished);
|
368
|
if (!CdmUtils.nullSafeEqual(year, nomRef.getDatePublishedString())){
|
369
|
logger.warn(row + "Unexpected year: " + year + "<->" + nomRef.getDatePublishedString());
|
370
|
}
|
371
|
if (volume_and_page != null && !name.getFullTitleCache().contains(volume_and_page)){
|
372
|
logger.warn(row + "volume_and_page not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ volume_and_page);
|
373
|
}
|
374
|
if (place_of_publication != null && !name.getFullTitleCache().contains(place_of_publication)){
|
375
|
logger.warn(row + "place_of_publication not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ place_of_publication);
|
376
|
}
|
377
|
}
|
378
|
if (isBlank(infraSpecRank)){
|
379
|
if (rank.isLower(Rank.SPECIES())){
|
380
|
logger.warn(row + "No infraspce marker given but rank is lower than species");
|
381
|
}
|
382
|
}else if ("subsp.".equals(infraSpecRank)){
|
383
|
if(!rank.equals(Rank.SUBSPECIES())){
|
384
|
logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
|
385
|
}
|
386
|
}else if ("var.".equals(infraSpecRank)){
|
387
|
if (!rank.equals(Rank.VARIETY())){
|
388
|
logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
|
389
|
}
|
390
|
}else if ("subvar.".equals(infraSpecRank)){
|
391
|
if (!rank.equals(Rank.SUBVARIETY())){
|
392
|
logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
|
393
|
}
|
394
|
}else if ("f.".equals(infraSpecRank)){
|
395
|
if (!rank.equals(Rank.FORM())){
|
396
|
logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
|
397
|
}
|
398
|
}else{
|
399
|
logger.warn(row + "Unhandled infraspec marker: " + infraSpecRank);
|
400
|
}
|
401
|
}
|
402
|
|
403
|
private String authorTitle(TeamOrPersonBase<?> author, TeamOrPersonBase<?> exAuthor) {
|
404
|
String authorStr = author == null? null: author.getNomenclaturalTitleCache();
|
405
|
String exAuthorStr = exAuthor == null? null: exAuthor.getNomenclaturalTitleCache();
|
406
|
return CdmUtils.concat(" ex ", exAuthorStr, authorStr);
|
407
|
}
|
408
|
|
409
|
private String normalizeYear(String year) {
|
410
|
if (year == null){
|
411
|
return null;
|
412
|
}else{
|
413
|
year = year.substring(1, year.length() - 1);
|
414
|
}
|
415
|
if (year.contains("\" [")){
|
416
|
String[] split = year.split("\" \\[");
|
417
|
year = split[1].replace("]","") + " [" + split[0]+"\"]";
|
418
|
}else if ("?".equals(year)){
|
419
|
return null;
|
420
|
}
|
421
|
return year;
|
422
|
}
|
423
|
|
424
|
@Override
|
425
|
protected void secondPass(SimpleExcelTaxonImportState<CaseariaImportConfigurator> state) {
|
426
|
state.putStatusItem(TAXON_MAPPING, taxonMapping);
|
427
|
|
428
|
|
429
|
Map<String, String> record = state.getOriginalRecord();
|
430
|
int line = state.getCurrentLine();
|
431
|
// String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
|
432
|
String status = getValue(record, TAXON_STATUS);
|
433
|
String sourceId = getValue(record, PLANT_NAME_ID);
|
434
|
String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
|
435
|
String family = getValue(record, FAMILY);
|
436
|
|
437
|
String accName = getValue(record, "AcceptedName");
|
438
|
String basionymId = getValue(record, "basionym_plant_name_id");
|
439
|
String homotypicSynonym = getValue(record, "homotypic_synonym");
|
440
|
|
441
|
// AcceptedName, Basionym, taxon_name_hybcorr, genus_hybrid, species_hybrid, homotypic_synonym,
|
442
|
// basionym_plant_name_id
|
443
|
|
444
|
String taxonNameStr = getValue(record, TAXON_NAME);
|
445
|
String taxonAuthors = getValue(record, TAXON_AUTHORS);
|
446
|
String fullNameStr = CdmUtils.concat(" ", taxonNameStr,taxonAuthors);
|
447
|
String row = String.valueOf(line) + "("+fullNameStr+"): ";
|
448
|
|
449
|
try {
|
450
|
if ((line % RECORDS_PER_TRANSACTION) == 0){
|
451
|
newTransaction(state);
|
452
|
System.out.println(line);
|
453
|
}
|
454
|
|
455
|
UUID uuid = taxonMapping.get(sourceId);
|
456
|
TaxonBase<?> taxonBase = getTaxonService().find(uuid);
|
457
|
if (taxonBase == null){
|
458
|
logger.warn(row + "taxonBase not found: " + sourceId);
|
459
|
return;
|
460
|
}
|
461
|
|
462
|
UUID accUuid = taxonMapping.get(accId);
|
463
|
boolean hasAccepted = !sourceId.equals(accId);
|
464
|
|
465
|
Taxon accTaxon = null;
|
466
|
TaxonNode parent = null;
|
467
|
Taxon child = null;
|
468
|
Synonym syn = null;
|
469
|
boolean isSynonymAccepted = false;
|
470
|
|
471
|
if(accId == null){
|
472
|
logger.info(row + "accID is null");
|
473
|
child = CdmBase.deproxy(taxonBase, Taxon.class);
|
474
|
//synonyms
|
475
|
}else if(hasAccepted){
|
476
|
TaxonBase<?> accTaxonBase = getTaxonService().find(accUuid);
|
477
|
if (accTaxonBase == null){
|
478
|
// logger.warn(row + "acctaxon not found: " + accId + "; " + accName);
|
479
|
}else if(!accTaxonBase.isInstanceOf(Taxon.class)){
|
480
|
logger.warn(row + "acctaxon is synonym: " + accId + "; " + accName);
|
481
|
isSynonymAccepted = true;
|
482
|
}else{
|
483
|
accTaxon = CdmBase.deproxy(accTaxonBase, Taxon.class);
|
484
|
if (!accTaxon.getName().getTitleCache().equals(accName)){
|
485
|
logger.warn(row + "Accepted name differs: " + accName +" <-> "+ accTaxon.getName().getTitleCache());
|
486
|
}
|
487
|
}
|
488
|
//accepted taxa
|
489
|
}else if (sourceId.equals(accId)){
|
490
|
if (!taxonBase.isInstanceOf(Taxon.class)){
|
491
|
logger.warn(row + "child not of class Taxon: " + sourceId);
|
492
|
}else{
|
493
|
Rank rank = taxonBase.getName().getRank();
|
494
|
child = CdmBase.deproxy(taxonBase, Taxon.class);
|
495
|
if(rank.equals(Rank.GENUS())){
|
496
|
parent = getFamily(row, family);
|
497
|
}else if (rank.equals(Rank.SPECIES())){
|
498
|
String genus = child.getName().getGenusOrUninomial();
|
499
|
UUID parentUuid = taxonMapping.get(genus);
|
500
|
parent = getParent(parentUuid, row);
|
501
|
}else if (rank.isLower(Rank.SPECIES())){
|
502
|
String speciesName = child.getName().getGenusOrUninomial() + " " + child.getName().getSpecificEpithet();
|
503
|
UUID parentUuid = taxonMapping.get(speciesName);
|
504
|
parent = getParent(parentUuid, row);
|
505
|
}
|
506
|
}
|
507
|
}
|
508
|
|
509
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
510
|
syn = CdmBase.deproxy(taxonBase, Synonym.class);
|
511
|
}
|
512
|
|
513
|
if ("Accepted".equals(status)){
|
514
|
if (parent == null){
|
515
|
logger.warn(row + "Parent is missing. Taxon is moved to 'unresolved' instead'");
|
516
|
parent = unresolvedParent();
|
517
|
}
|
518
|
if (child == null){
|
519
|
logger.warn(row + "Child is missing. Taxon not imported.");
|
520
|
}else{
|
521
|
if (!child.getTaxonNodes().isEmpty()){
|
522
|
if(!child.getName().getRank().equals(Rank.GENUS())){
|
523
|
logger.warn(row + "Taxon already has a parent. Taxon not attached to any further parent taxon.");
|
524
|
}
|
525
|
}else{
|
526
|
addChild(parent, child, row);
|
527
|
}
|
528
|
}
|
529
|
}else if ("Synonym".equals(status)){
|
530
|
if(accTaxon == null){
|
531
|
if(isSynonymAccepted){
|
532
|
logger.warn(row + "Synonym added to 'unresolved' as accepted taxon is synonym itself.");
|
533
|
}else if (accId != null){
|
534
|
logger.warn(row + "Accepted taxon "+accName+" for synonym unexpectedly does not exist. Synonym was moved to 'unresolved'");
|
535
|
}else{
|
536
|
logger.warn(row + "No accepted taxon given for synonym. Therefore taxon was moved to 'unresolved'");
|
537
|
}
|
538
|
if(accId != null){
|
539
|
child = Taxon.NewInstance(syn.getName(), syn.getSec());
|
540
|
taxonMapping.put(sourceId, child.getUuid());
|
541
|
child.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
|
542
|
}
|
543
|
addChild(unresolvedParent(), child, row);
|
544
|
getTaxonService().deleteSynonym(syn, new SynonymDeletionConfigurator());
|
545
|
}else{
|
546
|
accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
|
547
|
}
|
548
|
}else if ("Misapplied".equals(status)){
|
549
|
Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
|
550
|
if(accTaxon == null){
|
551
|
if(isSynonymAccepted){
|
552
|
logger.warn(row + "Misapplication added to 'unresolved' as accepted taxon is synonym itself.");
|
553
|
}else if (accId != null){
|
554
|
logger.warn(row + "Accepted taxon "+accName+" for misapplication unexpectedly does not exist. Misapplication was moved to 'unresolved'");
|
555
|
}else{
|
556
|
logger.warn(row + "No accepted taxon given for misapplication. Therefore taxon was moved to 'unresolved'");
|
557
|
}
|
558
|
addChild(unresolvedParent(), taxon, row);
|
559
|
}else{
|
560
|
accTaxon.addMisappliedName(taxon, null, null);
|
561
|
}
|
562
|
}else if ("Unplaced".equals(status)){
|
563
|
parent = unresolvedParent();
|
564
|
addChild(parent, child, row);
|
565
|
}else if("Illegitimate".equals(status) || "Invalid".equals(status)){
|
566
|
if (hasAccepted){
|
567
|
if(accTaxon == null){
|
568
|
logger.warn(row + "accepted taxon for illegitimate or invalid taxon not found. Illeg/inval taxon was moved to 'unresolved'");
|
569
|
child = Taxon.NewInstance(syn.getName(), syn.getSec());
|
570
|
addChild(unresolvedParent(), child, row);
|
571
|
}else{
|
572
|
accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
|
573
|
}
|
574
|
}else{
|
575
|
addChild(unresolvedParent(), child, row);
|
576
|
}
|
577
|
}else{
|
578
|
logger.warn(row + "Unhandled status: " + status);
|
579
|
}
|
580
|
|
581
|
if (basionymId != null && false){
|
582
|
UUID basionymUuid = taxonMapping.get(basionymId);
|
583
|
TaxonBase<?> basionymTaxon = getTaxonService().find(basionymUuid);
|
584
|
if (basionymTaxon != null){
|
585
|
if (hasSameAcceptedTaxon(taxonBase, basionymTaxon)){
|
586
|
if (taxonBase.getName().getBasionym() == null){
|
587
|
taxonBase.getName().addBasionym(basionymTaxon.getName());
|
588
|
}
|
589
|
}else{
|
590
|
logger.warn(row + "Basionym has not same accepted taxon and therefore was ignored.");
|
591
|
}
|
592
|
}else{
|
593
|
logger.warn(row + "Basionym "+basionymId+" not found.");
|
594
|
}
|
595
|
}
|
596
|
} catch (Exception e) {
|
597
|
logger.error(row + "Error.");
|
598
|
e.printStackTrace();
|
599
|
}
|
600
|
}
|
601
|
|
602
|
private boolean hasSameAcceptedTaxon(TaxonBase<?> taxonBase, TaxonBase<?> basionymTaxon) {
|
603
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
604
|
taxonBase = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
|
605
|
}
|
606
|
if (basionymTaxon.isInstanceOf(Synonym.class)){
|
607
|
basionymTaxon = CdmBase.deproxy(basionymTaxon, Synonym.class).getAcceptedTaxon();
|
608
|
}
|
609
|
return taxonBase != null && basionymTaxon != null && taxonBase.equals(basionymTaxon);
|
610
|
}
|
611
|
|
612
|
private TaxonNode getParent(UUID parentUuid, String row) {
|
613
|
if(parentUuid == null){
|
614
|
logger.warn(row + "Parent uuid is null. No parent found.");
|
615
|
return null;
|
616
|
}
|
617
|
TaxonBase<?> pTaxon = getTaxonService().find(parentUuid);
|
618
|
if (pTaxon == null){
|
619
|
logger.warn(row + "No parent found for parent UUID. This should not happen.");
|
620
|
return null;
|
621
|
}
|
622
|
if (pTaxon.isInstanceOf(Synonym.class)){
|
623
|
logger.warn(row + "Parent is synonym");
|
624
|
return null;
|
625
|
}else{
|
626
|
Taxon ptax = CdmBase.deproxy(pTaxon, Taxon.class);
|
627
|
if(ptax.getTaxonNodes().isEmpty()){
|
628
|
logger.info(row + "Parent has no node yet");
|
629
|
TaxonNode newParent = getClassification().addChildTaxon(ptax, null, null);
|
630
|
getTaxonNodeService().saveOrUpdate(newParent);
|
631
|
return newParent;
|
632
|
}else {
|
633
|
if(ptax.getTaxonNodes().size()>1){
|
634
|
logger.warn("Parent has >1 nodes. Take arbitrary one");
|
635
|
}
|
636
|
return ptax.getTaxonNodes().iterator().next();
|
637
|
}
|
638
|
}
|
639
|
}
|
640
|
|
641
|
private void addChild(TaxonNode parent, Taxon child, String row) {
|
642
|
if (parent == null){
|
643
|
logger.warn(row + "Parent is null");
|
644
|
}else if (child == null){
|
645
|
logger.warn(row + "Child is null");
|
646
|
}else{
|
647
|
if (!child.getTaxonNodes().isEmpty()){
|
648
|
TaxonNode childNode = child.getTaxonNodes().iterator().next();
|
649
|
if (childNode.getParent() != null && childNode.getParent().equals(parent)){
|
650
|
logger.info(row + "Parent-child relation exists already.");
|
651
|
}else{
|
652
|
logger.warn(row + "Child already has different parent. Parent-child relation not added.");
|
653
|
}
|
654
|
}else{
|
655
|
TaxonNode node = parent.addChildTaxon(child, null, null);
|
656
|
getTaxonNodeService().saveOrUpdate(node);
|
657
|
}
|
658
|
}
|
659
|
}
|
660
|
|
661
|
private TaxonNode getFamily(String line, String family){
|
662
|
UUID uuid;
|
663
|
if ("Salicaceae".equals(family)){
|
664
|
uuid = UUID.fromString("5432a4eb-2fbe-4494-925d-d01743ed435f");
|
665
|
// }else if ("Meliaceae".equals(family)){
|
666
|
// //Note: not needed, genus with family Meliaceae is synonym
|
667
|
// uuid = UUID.fromString("c8694910-bfec-45a1-8901-2a0a2a6f12b1");
|
668
|
}else{
|
669
|
logger.warn(line + "Family not yet handled: " + family);
|
670
|
return null;
|
671
|
}
|
672
|
TaxonNode familyNode = getTaxonNodeService().find(uuid);
|
673
|
if (familyNode == null){
|
674
|
familyNode = createFamily(family, uuid);
|
675
|
}
|
676
|
return familyNode;
|
677
|
}
|
678
|
|
679
|
private TaxonNode createFamily(String family, UUID uuid) {
|
680
|
Classification classification = getClassification();
|
681
|
TaxonName name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
|
682
|
name.setGenusOrUninomial(family);
|
683
|
Taxon taxon = Taxon.NewInstance(name, getSecRef());
|
684
|
TaxonNode result = classification.addChildTaxon(taxon, null, null);
|
685
|
result.setUuid(uuid);
|
686
|
getTaxonNodeService().saveOrUpdate(result);
|
687
|
return result;
|
688
|
}
|
689
|
|
690
|
private Classification getClassification() {
|
691
|
Classification classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
692
|
if (classification == null){
|
693
|
classification = Classification.NewInstance(
|
694
|
state.getConfig().getClassificationName(), getSecRef(), Language.LATIN());
|
695
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
696
|
getClassificationService().save(classification);
|
697
|
}
|
698
|
return classification;
|
699
|
}
|
700
|
|
701
|
private TaxonNode unresolvedParent(){
|
702
|
UUID uuid = UUID.fromString("1c48b8d3-077d-4aef-9e41-d4d3e0abd4c7");
|
703
|
TaxonNode unresolvedParent = getTaxonNodeService().find(uuid);
|
704
|
if (unresolvedParent == null){
|
705
|
unresolvedParent = createFamily("Unresolved", uuid);
|
706
|
}
|
707
|
return unresolvedParent;
|
708
|
}
|
709
|
}
|