1
|
/**
|
2
|
* Copyright (C) 2016 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.caryo;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.List;
|
14
|
import java.util.Map;
|
15
|
import java.util.Set;
|
16
|
import java.util.UUID;
|
17
|
import java.util.regex.Matcher;
|
18
|
import java.util.regex.Pattern;
|
19
|
|
20
|
import org.apache.commons.lang3.StringUtils;
|
21
|
import org.apache.logging.log4j.LogManager;
|
22
|
import org.apache.logging.log4j.Logger;
|
23
|
import org.springframework.stereotype.Component;
|
24
|
import org.springframework.transaction.TransactionStatus;
|
25
|
|
26
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
27
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
28
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
29
|
import eu.etaxonomy.cdm.model.agent.Person;
|
30
|
import eu.etaxonomy.cdm.model.agent.Team;
|
31
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
32
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
33
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
34
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
35
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
36
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
|
37
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
38
|
import eu.etaxonomy.cdm.model.name.Rank;
|
39
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
40
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
41
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
42
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
43
|
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
44
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
45
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
46
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
47
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
48
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
49
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
50
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
51
|
|
52
|
/**
|
53
|
* Kew excel taxon import for Caryophyllaceae.
|
54
|
*
|
55
|
* @author a.mueller
|
56
|
* @since 05.01.2022
|
57
|
*/
|
58
|
@Component
|
59
|
public class KewExcelTaxonImport<CONFIG extends KewExcelTaxonImportConfigurator>
|
60
|
extends SimpleExcelTaxonImport<CONFIG>{
|
61
|
|
62
|
private static final long serialVersionUID = 1081966876789613803L;
|
63
|
private static final Logger logger = LogManager.getLogger();
|
64
|
|
65
|
private static final String NO_SIMPLE_DIFF = "xxxxx";
|
66
|
|
67
|
private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0";
|
68
|
private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c";
|
69
|
private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060";
|
70
|
|
71
|
private static final String CDM_Name_UUID = "CDM-Name_UUID";
|
72
|
private static final String Kew_Name_ID = "Kew-Name-ID";
|
73
|
private static final String Kew_Name_Citation = "Kew-Name-Citation";
|
74
|
private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status";
|
75
|
private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status";
|
76
|
private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID";
|
77
|
private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID";
|
78
|
private static final String GENUS_HYBRID = "genus_hybrid";
|
79
|
private static final String GENUS = "genus";
|
80
|
private static final String SPECIES_HYBRID = "species_hybrid";
|
81
|
private static final String SPECIES = "species";
|
82
|
|
83
|
private static final String infraspecific_rank = "infraspecific_rank";
|
84
|
private static final String infraspecies = "infraspecies";
|
85
|
|
86
|
private static final String parenthetical_author = "parenthetical_author";
|
87
|
private static final String primary_author = "primary_author";
|
88
|
private static final String publication_author = "publication_author";
|
89
|
private static final String place_of_publication = "place_of_publication";
|
90
|
private static final String volume_and_page = "volume_and_page";
|
91
|
private static final String KewYear4CDM = "KewYear4CDM";
|
92
|
private static final String PubTypeABSG = "PubTypeABSG";
|
93
|
private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID";
|
94
|
|
95
|
private static final Map<String, UUID> nameMap = new HashMap<>();
|
96
|
private static final Map<String, UUID> taxonMap = new HashMap<>();
|
97
|
|
98
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
99
|
CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status,
|
100
|
Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS,
|
101
|
SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies,
|
102
|
parenthetical_author, primary_author, publication_author, place_of_publication,
|
103
|
volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID
|
104
|
});
|
105
|
|
106
|
private Reference sourceReference;
|
107
|
private Reference secReference;
|
108
|
|
109
|
private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
110
|
|
111
|
// @Override
|
112
|
// protected String getWorksheetName(CONFIG config) {
|
113
|
// return "valid taxa names";
|
114
|
// }
|
115
|
|
116
|
@Override
|
117
|
protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
118
|
|
119
|
String line = getLine(state, 50);
|
120
|
// System.out.println(line);
|
121
|
Map<String, String> record = state.getOriginalRecord();
|
122
|
|
123
|
Set<String> keys = record.keySet();
|
124
|
for (String key: keys) {
|
125
|
if (! expectedKeys.contains(key)){
|
126
|
logger.warn(line + "Unexpected Key: " + key);
|
127
|
}
|
128
|
}
|
129
|
|
130
|
makeTaxon(state, line, record);
|
131
|
}
|
132
|
|
133
|
private void makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record) {
|
134
|
// state.getTransactionStatus().flush();
|
135
|
Reference sec = getSecReference(state, record);
|
136
|
|
137
|
//name
|
138
|
TaxonName existingName = getExistingName(state, line);
|
139
|
if (existingName != null){
|
140
|
verifyName(state, existingName, record, line, false);
|
141
|
}else{
|
142
|
existingName = createName(state, line);
|
143
|
}
|
144
|
|
145
|
//taxon
|
146
|
TaxonBase<?> taxonBase = makeTaxonBase(state, line, record, existingName, sec);
|
147
|
|
148
|
if (taxonBase != null){
|
149
|
getTaxonService().saveOrUpdate(taxonBase);
|
150
|
}
|
151
|
|
152
|
return;
|
153
|
}
|
154
|
|
155
|
private TaxonName createName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
156
|
//parse
|
157
|
String fullTitle = getValue(state, Kew_Name_Citation);
|
158
|
String kewNameId = getValue(state, Kew_Name_ID);
|
159
|
|
160
|
// fullTitle = replaceBookSectionAuthor(state, fullTitle);
|
161
|
|
162
|
TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES());
|
163
|
handleBookSectionAuthor(newName, state, line);
|
164
|
|
165
|
putName(kewNameId, newName.getUuid(), line);
|
166
|
//name status
|
167
|
makeNameStatus(line, state.getOriginalRecord(), newName);
|
168
|
verifyName(state, newName, state.getOriginalRecord(), line, true);
|
169
|
//deduplication
|
170
|
replaceNameAuthorsAndReferences(state, newName);
|
171
|
newName.addSource(makeOriginalSource(state));
|
172
|
getNameService().saveOrUpdate(newName);
|
173
|
//Kew-Nomencl-Status
|
174
|
return newName;
|
175
|
}
|
176
|
|
177
|
private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
178
|
String type = getValue(state, PubTypeABSG);
|
179
|
if ("BS".equals(type)){
|
180
|
Reference book = newName.getNomenclaturalReference();
|
181
|
String pubAuthor = getValue(state, publication_author);
|
182
|
if (book != null && StringUtils.isNotEmpty(pubAuthor)){
|
183
|
TeamOrPersonBase<?> bookAuthor = parseBookSectionAuthor(pubAuthor, line);
|
184
|
Reference bookSection = ReferenceFactory.newBookSection();
|
185
|
bookSection.setAuthorship(book.getAuthorship());
|
186
|
book.setAuthorship(bookAuthor);
|
187
|
bookSection.setInReference(book);
|
188
|
bookSection.setDatePublished(book.getDatePublished());
|
189
|
newName.setNomenclaturalReference(bookSection);
|
190
|
}else{
|
191
|
logger.warn(line + "unexpected booksection author handling");
|
192
|
}
|
193
|
}
|
194
|
}
|
195
|
|
196
|
private TeamOrPersonBase<?> parseBookSectionAuthor(String pubAuthor, String line) {
|
197
|
TeamOrPersonBase<?> result;
|
198
|
String ed = "";
|
199
|
if (pubAuthor.endsWith(" (ed.)")){
|
200
|
ed = " (ed.)";
|
201
|
}else if (pubAuthor.endsWith(" (eds.)")){
|
202
|
ed = " (eds.)";
|
203
|
}
|
204
|
pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length());
|
205
|
String[] splits = pubAuthor.split("(, | & )");
|
206
|
if (splits.length > 1){
|
207
|
Team team = Team.NewInstance();
|
208
|
result = team;
|
209
|
for (String split : splits){
|
210
|
if ("al.".equals(split.trim())){
|
211
|
team.setHasMoreMembers(true);
|
212
|
}else{
|
213
|
team.addTeamMember(getPerson(split, line));
|
214
|
}
|
215
|
}
|
216
|
}else{
|
217
|
result = getPerson(splits[0], line);
|
218
|
}
|
219
|
if (ed.length() > 0){
|
220
|
result.setTitleCache(result.getTitleCache() + ed, true);
|
221
|
}
|
222
|
return result;
|
223
|
}
|
224
|
|
225
|
private Person getPerson(String personStr, String line) {
|
226
|
Person result = Person.NewInstance();
|
227
|
String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?<famname>[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)";
|
228
|
// regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier";
|
229
|
Matcher matcher = Pattern.compile(regEx).matcher(personStr);
|
230
|
if (matcher.matches()){
|
231
|
String famName = matcher.group("famname");
|
232
|
result.setFamilyName(famName);
|
233
|
String initials = personStr.replace(famName,"").trim();
|
234
|
result.setInitials(initials);
|
235
|
}else{
|
236
|
result.setTitleCache(personStr, true);
|
237
|
logger.warn(line + "BookSection author could not be parsed: " + personStr);
|
238
|
}
|
239
|
return result;
|
240
|
}
|
241
|
|
242
|
private String replaceBookSectionAuthor(SimpleExcelTaxonImportState<CONFIG> state, String fullTitle) {
|
243
|
String type = getValue(state, PubTypeABSG);
|
244
|
if ("BS".equals(type)){
|
245
|
String pubAuthor = getValue(state, publication_author);
|
246
|
int inIndex = fullTitle.indexOf(" in ");
|
247
|
int commaIndex = fullTitle.indexOf(", ");
|
248
|
|
249
|
}
|
250
|
return fullTitle;
|
251
|
}
|
252
|
|
253
|
private void verifyName(SimpleExcelTaxonImportState<CONFIG> state, TaxonName taxonName,
|
254
|
Map<String, String> record, String line, boolean isNew) {
|
255
|
if (isNew){
|
256
|
boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line);
|
257
|
if (!parsed){
|
258
|
return;
|
259
|
}
|
260
|
}
|
261
|
String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew);
|
262
|
verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew);
|
263
|
verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew);
|
264
|
verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew);
|
265
|
String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship());
|
266
|
verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew);
|
267
|
String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship());
|
268
|
verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew);
|
269
|
|
270
|
//reference
|
271
|
Reference nomRef = taxonName.getNomenclaturalReference();
|
272
|
if (nomRef == null){
|
273
|
logger.warn(line + "no nom.ref. exists in existing name");
|
274
|
}else{
|
275
|
|
276
|
//place of publication
|
277
|
boolean hasInRef = nomRef.getInReference() != null;
|
278
|
String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ?
|
279
|
nomRef.getInReference().getAbbrevTitle() :
|
280
|
nomRef.getAbbrevTitle();
|
281
|
String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew);
|
282
|
//author
|
283
|
String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache();
|
284
|
verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew);
|
285
|
//vol and page
|
286
|
String existingVolume = getVolume(nomRef);
|
287
|
String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference());
|
288
|
verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew);
|
289
|
//year
|
290
|
verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew);
|
291
|
//pub type
|
292
|
verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew);
|
293
|
}
|
294
|
}
|
295
|
|
296
|
private String getVolume(Reference nomRef) {
|
297
|
Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef;
|
298
|
String vol = ref.getVolume();
|
299
|
String edition = ref.getEdition();
|
300
|
if (StringUtils.isNotBlank(edition)){
|
301
|
edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ",";
|
302
|
}
|
303
|
String series = ref.getSeriesPart();
|
304
|
if (StringUtils.isNotBlank(series)){
|
305
|
series = ", " + (isNumber(series)? "ser. ":"") + series + ",";
|
306
|
}
|
307
|
|
308
|
return vol;
|
309
|
}
|
310
|
|
311
|
private boolean isNumber(String edition) {
|
312
|
try {
|
313
|
Integer.valueOf(edition);
|
314
|
} catch (NumberFormatException e) {
|
315
|
return false;
|
316
|
}
|
317
|
return true;
|
318
|
}
|
319
|
|
320
|
private String authorAndExAuthor(TeamOrPersonBase<?> author,
|
321
|
TeamOrPersonBase<?> exAuthor) {
|
322
|
return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "")
|
323
|
+ author.getNomenclaturalTitleCache();
|
324
|
}
|
325
|
|
326
|
private String replaceStatus(String fullTitleCache) {
|
327
|
return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", "");
|
328
|
}
|
329
|
|
330
|
private String abbrefRefType(ReferenceType type) {
|
331
|
return type == ReferenceType.Article ? "A" :
|
332
|
type == ReferenceType.Book ? "B" :
|
333
|
type == ReferenceType.BookSection ? "BS" :
|
334
|
type == ReferenceType.Generic ? "GEN" :
|
335
|
type.getLabel() ;
|
336
|
}
|
337
|
|
338
|
private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line, String noLogIf, boolean isNew) {
|
339
|
return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew);
|
340
|
}
|
341
|
|
342
|
private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line,
|
343
|
String noLogIf, String noLogIf2, boolean isNew) {
|
344
|
String value = getValue(record, fieldName);
|
345
|
if (!CdmUtils.nullSafeEqual(expectedValue, value)){
|
346
|
String diff = singleDiff(expectedValue, value);
|
347
|
String label = isNew ? "New " : "Existing";
|
348
|
if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){
|
349
|
System.out.println(" " + line + fieldName + "\n "+label+": " + expectedValue + "\n Kew : " + value);
|
350
|
}
|
351
|
return diff;
|
352
|
}else{
|
353
|
return "";
|
354
|
}
|
355
|
}
|
356
|
|
357
|
private String singleDiff(String expectedValue, String value) {
|
358
|
if (expectedValue == null){
|
359
|
return CdmUtils.Nz(value);
|
360
|
}else if (value == null){
|
361
|
return CdmUtils.Nz(expectedValue);
|
362
|
}
|
363
|
expectedValue = expectedValue.trim();
|
364
|
value = value.trim();
|
365
|
String diff_ab = StringUtils.difference(expectedValue, value);
|
366
|
String diff_ba = StringUtils.difference(value, expectedValue);
|
367
|
if (diff_ab.endsWith(diff_ba)){
|
368
|
return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length());
|
369
|
}else if (diff_ba.endsWith(diff_ab)){
|
370
|
return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length());
|
371
|
}else{
|
372
|
return NO_SIMPLE_DIFF;
|
373
|
}
|
374
|
}
|
375
|
|
376
|
private TaxonName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
377
|
String cdmNameUuid = getValue(state, CDM_Name_UUID);
|
378
|
String kewNameId = getValue(state, Kew_Name_ID);
|
379
|
if (cdmNameUuid == null){
|
380
|
return null;
|
381
|
}
|
382
|
TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid));
|
383
|
if (existingName != null){
|
384
|
putName(kewNameId, existingName.getUuid(), line);
|
385
|
return CdmBase.deproxy(existingName);
|
386
|
}else{
|
387
|
return null;
|
388
|
}
|
389
|
}
|
390
|
|
391
|
private void putName(String kewNameId, UUID uuid, String line) {
|
392
|
UUID existingUuid = nameMap.put(kewNameId, uuid);
|
393
|
if (existingUuid != null){
|
394
|
logger.warn(line + "Kew-Name-id already exists: " + kewNameId);
|
395
|
}
|
396
|
}
|
397
|
|
398
|
|
399
|
private void makeNameStatus(String line, Map<String, String> record,
|
400
|
TaxonName taxonName) {
|
401
|
String nameStatus = getValue(record, Kew_Nomencl_Status);
|
402
|
NomenclaturalStatusType status;
|
403
|
if (isBlank(nameStatus)){
|
404
|
status = null;
|
405
|
}else if ("Illegitimate".equals(nameStatus)){
|
406
|
status = NomenclaturalStatusType.ILLEGITIMATE();
|
407
|
}else if ("Invalid".equals(nameStatus)){
|
408
|
status = NomenclaturalStatusType.INVALID();
|
409
|
}else{
|
410
|
logger.warn(line + "Nom. status not recognized: " + nameStatus);
|
411
|
status = null;
|
412
|
}
|
413
|
if (status != null){
|
414
|
taxonName.addStatus(NomenclaturalStatus.NewInstance(status));
|
415
|
}
|
416
|
}
|
417
|
|
418
|
|
419
|
private TaxonBase<?> makeTaxonBase(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
420
|
Map<String, String> record, TaxonName taxonName, Reference sec) {
|
421
|
|
422
|
TaxonBase<?> taxonBase;
|
423
|
boolean isUnplaced = false;
|
424
|
String taxStatusStr = getValue(record, Kew_Taxonomic_Status);
|
425
|
|
426
|
if ("Accepted".equals(taxStatusStr)){
|
427
|
taxonBase = Taxon.NewInstance(taxonName, sec);
|
428
|
}else if ("Synonym".equals(taxStatusStr)){
|
429
|
taxonBase = Synonym.NewInstance(taxonName, sec);
|
430
|
}else if ("Artificial Hybrid".equals(taxStatusStr)){
|
431
|
taxonBase = Synonym.NewInstance(taxonName, sec);
|
432
|
}else if ("Unplaced".equals(taxStatusStr)){
|
433
|
taxonBase = Taxon.NewInstance(taxonName, sec);
|
434
|
}else{
|
435
|
logger.warn(line + "Status not handled: " + taxStatusStr);
|
436
|
return null;
|
437
|
}
|
438
|
taxonBase.addSource(makeOriginalSource(state));
|
439
|
taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid());
|
440
|
if (taxonBase instanceof Taxon){
|
441
|
UUID existing = taxonMap.get(taxonBase.getName().getNameCache());
|
442
|
if (existing == null || !isUnplaced){
|
443
|
taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
|
444
|
}else if (!isUnplaced){
|
445
|
taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
|
446
|
System.out.println(" " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache());
|
447
|
}
|
448
|
}
|
449
|
return taxonBase;
|
450
|
}
|
451
|
|
452
|
int c2 = 0;
|
453
|
@Override
|
454
|
protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
455
|
|
456
|
String kewId = getValue(state, Kew_Name_ID) + ": ";
|
457
|
String line = " (line: " + state.getCurrentLine() + ")";
|
458
|
// System.out.println(line);
|
459
|
if (c2++ % 100 == 0){
|
460
|
this.commitTransaction(state.getTransactionStatus());
|
461
|
this.classification = null;
|
462
|
this.secReference = null;
|
463
|
this.sourceReference = null;
|
464
|
this.orphanedSynonymTaxon = null;
|
465
|
TransactionStatus tx = this.startTransaction();
|
466
|
state.setTransactionStatus(tx);
|
467
|
logger.info(line + "New transaction started.");
|
468
|
}
|
469
|
Map<String, String> record = state.getOriginalRecord();
|
470
|
|
471
|
Classification classification = getClassification(state);
|
472
|
TaxonBase<?> taxonBase = getTaxon(record);
|
473
|
TaxonName taxonName = taxonBase.getName();
|
474
|
|
475
|
if (taxonBase.isInstanceOf(Taxon.class)){
|
476
|
Taxon parent = getParent(record, taxonName, line, kewId);
|
477
|
if (parent != null){
|
478
|
classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null);
|
479
|
}
|
480
|
}else if (taxonBase.isInstanceOf(Synonym.class)){
|
481
|
Taxon taxon = getAcceptedTaxon(record, line, kewId);
|
482
|
if (taxon == null){
|
483
|
taxon = getOrphanedSynonymTaxon(state);
|
484
|
logger.warn(kewId + "Accepted taxon not found. Added synonym to 'orphaned synonym taxon': " + getValue(record, Kew_Rel_Acc_Name_ID) + line);
|
485
|
}
|
486
|
taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF);
|
487
|
}else{
|
488
|
logger.warn("Unhandled");
|
489
|
}
|
490
|
|
491
|
String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID);
|
492
|
if (basionymId != null){
|
493
|
UUID basionymUuid = nameMap.get(basionymId);
|
494
|
TaxonName basionym = getNameService().find(basionymUuid);
|
495
|
if(basionym == null){
|
496
|
logger.warn(kewId + "Basionym does not exist: " + basionymId + line);
|
497
|
}else{
|
498
|
taxonName.addBasionym(basionym);
|
499
|
taxonName.mergeHomotypicGroups(basionym); //just in case this is not automatically done
|
500
|
//TODO
|
501
|
// adjustSynonymType(taxonBase, basionymTaxon, line);
|
502
|
}
|
503
|
}
|
504
|
|
505
|
}
|
506
|
|
507
|
Taxon orphanedSynonymTaxon;
|
508
|
private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState<CONFIG> state) {
|
509
|
if (orphanedSynonymTaxon != null) {
|
510
|
return orphanedSynonymTaxon;
|
511
|
}
|
512
|
UUID orphanedTaxonUuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON);
|
513
|
orphanedSynonymTaxon = CdmBase.deproxy(getTaxonService().find(orphanedTaxonUuid), Taxon.class);
|
514
|
if (orphanedSynonymTaxon == null){
|
515
|
TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY());
|
516
|
placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true);
|
517
|
orphanedSynonymTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord()));
|
518
|
orphanedSynonymTaxon.setUuid(orphanedTaxonUuid);
|
519
|
Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
|
520
|
TaxonNode orphandNode = getClassification(state).addParentChild(unplacedTaxon, orphanedSynonymTaxon, null, null);
|
521
|
getTaxonNodeService().save(orphandNode);
|
522
|
}
|
523
|
return orphanedSynonymTaxon;
|
524
|
}
|
525
|
|
526
|
private Classification classification;
|
527
|
private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
|
528
|
if (classification == null){
|
529
|
classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
530
|
}
|
531
|
return classification;
|
532
|
}
|
533
|
|
534
|
private Taxon getAcceptedTaxon(Map<String, String> record, String line, String kewId) {
|
535
|
String statusStr = getValue(record, Kew_Taxonomic_Status);
|
536
|
if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){
|
537
|
String accKewId = getValue(record, Kew_Rel_Acc_Name_ID);
|
538
|
UUID accUuid = taxonMap.get(accKewId);
|
539
|
TaxonBase<?> accBase = getTaxonService().find(accUuid);
|
540
|
if (accBase == null){
|
541
|
logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line);
|
542
|
return null;
|
543
|
}else if (accBase.isInstanceOf(Synonym.class)){
|
544
|
logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line);
|
545
|
return null;
|
546
|
}else{
|
547
|
return CdmBase.deproxy(accBase, Taxon.class);
|
548
|
}
|
549
|
}else{
|
550
|
logger.warn(kewId + "Parent not retrieved" + line);
|
551
|
return null;
|
552
|
}
|
553
|
}
|
554
|
|
555
|
private Taxon getParent(Map<String, String> record, TaxonName taxonName, String line, String kewId) {
|
556
|
String statusStr = getValue(record, Kew_Taxonomic_Status);
|
557
|
if ("Unplaced".equals(statusStr)){
|
558
|
return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
|
559
|
}else if ("Artificial Hybrid".equals(statusStr)){
|
560
|
return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now
|
561
|
}else if ("Accepted".equals(statusStr)){
|
562
|
String higherName = getHigherRankName(taxonName);
|
563
|
UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName);
|
564
|
if (parentTaxonUuid != null){
|
565
|
TaxonBase<?> parentBase = getTaxonService().find(parentTaxonUuid);
|
566
|
if (parentBase == null){
|
567
|
return null;
|
568
|
} else if (parentBase.isInstanceOf(Taxon.class)){
|
569
|
Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class);
|
570
|
return parentTaxon;
|
571
|
} else {
|
572
|
logger.warn(kewId + "Parent is synonym " + line);
|
573
|
return null;
|
574
|
}
|
575
|
}else{
|
576
|
return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class);
|
577
|
}
|
578
|
}else if ("Synonym".equals(statusStr)){
|
579
|
//not relevant
|
580
|
return null;
|
581
|
}else{
|
582
|
logger.warn(kewId + "Parent not retrieved" + line);
|
583
|
return null;
|
584
|
}
|
585
|
}
|
586
|
|
587
|
private String getHigherRankName(TaxonName taxonName) {
|
588
|
if (Rank.SPECIES().equals(taxonName.getRank())){
|
589
|
return taxonName.getGenusOrUninomial();
|
590
|
}else if (taxonName.isInfraSpecific()){
|
591
|
return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet();
|
592
|
}
|
593
|
return null;
|
594
|
}
|
595
|
|
596
|
private void adjustSynonymType(TaxonBase<?> taxonBase, TaxonBase<?> homotypicTaxon, String line) {
|
597
|
adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line);
|
598
|
adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line);
|
599
|
}
|
600
|
|
601
|
private void adjustSynonymTypeOrdered(TaxonBase<?> firstTaxon, TaxonBase<?> secondTaxon, String line) {
|
602
|
if (firstTaxon == null){
|
603
|
logger.warn(line + "first taxon is null for adjust synonym type");
|
604
|
}else if (secondTaxon == null){
|
605
|
logger.warn(line + "second taxon is null for adjust synonym type");
|
606
|
}else if (secondTaxon.isInstanceOf(Synonym.class)){
|
607
|
Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class);
|
608
|
if (firstTaxon.equals(syn.getAcceptedTaxon())){
|
609
|
syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF);
|
610
|
}
|
611
|
}
|
612
|
}
|
613
|
|
614
|
protected TaxonBase<?> getTaxon(Map<String, String> record) {
|
615
|
String kew_name_id = getValue(record, Kew_Name_ID);
|
616
|
UUID taxonUuid = taxonMap.get(kew_name_id);
|
617
|
TaxonBase<?> taxon = getTaxonService().find(taxonUuid);
|
618
|
return taxon;
|
619
|
}
|
620
|
|
621
|
private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) {
|
622
|
boolean result = true;
|
623
|
if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) {
|
624
|
logger.warn(line + "Name could not be parsed: " + fullName);
|
625
|
result = false;
|
626
|
}
|
627
|
Reference nomRef = name.getNomenclaturalReference();
|
628
|
if (nomRef != null && (nomRef.isProtectedTitleCache()
|
629
|
|| nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){
|
630
|
logger.warn(line + "Nom ref could not be parsed: " + fullName);
|
631
|
result = false;
|
632
|
}
|
633
|
if (nameStr != null && !name.getTitleCache().equals(nameStr)){
|
634
|
logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr);
|
635
|
result = false;
|
636
|
}
|
637
|
return result;
|
638
|
}
|
639
|
|
640
|
private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record) {
|
641
|
if (this.secReference == null){
|
642
|
logger.warn("Load sec ref");
|
643
|
String secUuid = record.get(Sec_Ref_CDM_UUID);
|
644
|
secReference = getReferenceService().load(UUID.fromString(secUuid));
|
645
|
if (this.secReference == null){
|
646
|
logger.warn("Sec ref is null");
|
647
|
}
|
648
|
}
|
649
|
return this.secReference;
|
650
|
}
|
651
|
|
652
|
private Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
|
653
|
if (this.sourceReference == null){
|
654
|
this.sourceReference = getPersistentReference(state.getConfig().getSourceReference());
|
655
|
}
|
656
|
return this.sourceReference;
|
657
|
}
|
658
|
|
659
|
private Reference getPersistentReference(Reference reference) {
|
660
|
Reference result = getReferenceService().find(reference.getUuid());
|
661
|
logger.warn("Loaded persistent reference: "+ reference.getUuid());
|
662
|
if (result == null){
|
663
|
logger.warn("Persistent reference is null: " + reference.getUuid());
|
664
|
result = reference;
|
665
|
getReferenceService().saveOrUpdate(result);
|
666
|
}
|
667
|
return result;
|
668
|
}
|
669
|
|
670
|
private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState<CONFIG> state, INonViralName name) {
|
671
|
state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
|
672
|
}
|
673
|
|
674
|
|
675
|
@Override
|
676
|
protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
|
677
|
String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID);
|
678
|
return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state));
|
679
|
}
|
680
|
}
|