1
|
/**
|
2
|
* Copyright (C) 2016 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.caryo;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.List;
|
14
|
import java.util.Map;
|
15
|
import java.util.Set;
|
16
|
import java.util.UUID;
|
17
|
import java.util.regex.Matcher;
|
18
|
import java.util.regex.Pattern;
|
19
|
|
20
|
import org.apache.commons.lang3.StringUtils;
|
21
|
import org.apache.log4j.Logger;
|
22
|
import org.springframework.stereotype.Component;
|
23
|
import org.springframework.transaction.TransactionStatus;
|
24
|
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
27
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
28
|
import eu.etaxonomy.cdm.model.agent.Person;
|
29
|
import eu.etaxonomy.cdm.model.agent.Team;
|
30
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
31
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
32
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
33
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
34
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
35
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
|
36
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
37
|
import eu.etaxonomy.cdm.model.name.Rank;
|
38
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
39
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
40
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
41
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
42
|
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
43
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
44
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
45
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
46
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
47
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
48
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
49
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
50
|
|
51
|
/**
|
52
|
* Kew excel taxon import for Caryophyllaceae.
|
53
|
*
|
54
|
* @author a.mueller
|
55
|
* @since 05.01.2022
|
56
|
*/
|
57
|
@Component
|
58
|
public class KewExcelTaxonImport<CONFIG extends KewExcelTaxonImportConfigurator>
|
59
|
extends SimpleExcelTaxonImport<CONFIG>{
|
60
|
|
61
|
private static final long serialVersionUID = 1081966876789613803L;
|
62
|
private static final Logger logger = Logger.getLogger(KewExcelTaxonImport.class);
|
63
|
|
64
|
private static final String NO_SIMPLE_DIFF = "xxxxx";
|
65
|
|
66
|
private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0";
|
67
|
private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c";
|
68
|
private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060";
|
69
|
|
70
|
private static final String CDM_Name_UUID = "CDM-Name_UUID";
|
71
|
private static final String Kew_Name_ID = "Kew-Name-ID";
|
72
|
private static final String Kew_Name_Citation = "Kew-Name-Citation";
|
73
|
private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status";
|
74
|
private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status";
|
75
|
private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID";
|
76
|
private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID";
|
77
|
private static final String GENUS_HYBRID = "genus_hybrid";
|
78
|
private static final String GENUS = "genus";
|
79
|
private static final String SPECIES_HYBRID = "species_hybrid";
|
80
|
private static final String SPECIES = "species";
|
81
|
|
82
|
private static final String infraspecific_rank = "infraspecific_rank";
|
83
|
private static final String infraspecies = "infraspecies";
|
84
|
|
85
|
private static final String parenthetical_author = "parenthetical_author";
|
86
|
private static final String primary_author = "primary_author";
|
87
|
private static final String publication_author = "publication_author";
|
88
|
private static final String place_of_publication = "place_of_publication";
|
89
|
private static final String volume_and_page = "volume_and_page";
|
90
|
private static final String KewYear4CDM = "KewYear4CDM";
|
91
|
private static final String PubTypeABSG = "PubTypeABSG";
|
92
|
private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID";
|
93
|
|
94
|
private static final Map<String, UUID> nameMap = new HashMap<>();
|
95
|
private static final Map<String, UUID> taxonMap = new HashMap<>();
|
96
|
|
97
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
98
|
CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status,
|
99
|
Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS,
|
100
|
SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies,
|
101
|
parenthetical_author, primary_author, publication_author, place_of_publication,
|
102
|
volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID
|
103
|
});
|
104
|
|
105
|
private Reference sourceReference;
|
106
|
private Reference secReference;
|
107
|
|
108
|
private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
109
|
|
110
|
// @Override
|
111
|
// protected String getWorksheetName(CONFIG config) {
|
112
|
// return "valid taxa names";
|
113
|
// }
|
114
|
|
115
|
@Override
|
116
|
protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
117
|
|
118
|
String line = getLine(state, 50);
|
119
|
System.out.println(line);
|
120
|
Map<String, String> record = state.getOriginalRecord();
|
121
|
|
122
|
Set<String> keys = record.keySet();
|
123
|
for (String key: keys) {
|
124
|
if (! expectedKeys.contains(key)){
|
125
|
logger.warn(line + "Unexpected Key: " + key);
|
126
|
}
|
127
|
}
|
128
|
|
129
|
makeTaxon(state, line, record);
|
130
|
}
|
131
|
|
132
|
private void makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record) {
|
133
|
// state.getTransactionStatus().flush();
|
134
|
Reference sec = getSecReference(state, record);
|
135
|
|
136
|
//name
|
137
|
TaxonName existingName = getExistingName(state, line);
|
138
|
if (existingName != null){
|
139
|
verifyName(state, existingName, record, line, false);
|
140
|
}else{
|
141
|
existingName = createName(state, line);
|
142
|
}
|
143
|
|
144
|
//taxon
|
145
|
TaxonBase<?> taxonBase = makeTaxonBase(state, line, record, existingName, sec);
|
146
|
|
147
|
if (taxonBase != null){
|
148
|
getTaxonService().saveOrUpdate(taxonBase);
|
149
|
}
|
150
|
|
151
|
return;
|
152
|
}
|
153
|
|
154
|
private TaxonName createName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
155
|
//parse
|
156
|
String fullTitle = getValue(state, Kew_Name_Citation);
|
157
|
String kewNameId = getValue(state, Kew_Name_ID);
|
158
|
|
159
|
fullTitle = replaceBookSectionAuthor(state, fullTitle);
|
160
|
|
161
|
TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES());
|
162
|
handleBookSectionAuthor(newName, state, line);
|
163
|
|
164
|
putName(kewNameId, newName.getUuid(), line);
|
165
|
//name status
|
166
|
makeNameStatus(line, state.getOriginalRecord(), newName);
|
167
|
verifyName(state, newName, state.getOriginalRecord(), line, true);
|
168
|
//deduplication
|
169
|
replaceNameAuthorsAndReferences(state, newName);
|
170
|
newName.addSource(makeOriginalSource(state));
|
171
|
getNameService().saveOrUpdate(newName);
|
172
|
//Kew-Nomencl-Status
|
173
|
return newName;
|
174
|
}
|
175
|
|
176
|
private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
177
|
String type = getValue(state, PubTypeABSG);
|
178
|
if ("BS".equals(type)){
|
179
|
Reference book = newName.getNomenclaturalReference();
|
180
|
String pubAuthor = getValue(state, publication_author);
|
181
|
if (book != null && StringUtils.isNotEmpty(pubAuthor)){
|
182
|
TeamOrPersonBase<?> bookAuthor = parseBookSectionAuthor(pubAuthor, line);
|
183
|
Reference bookSection = ReferenceFactory.newBookSection();
|
184
|
bookSection.setAuthorship(book.getAuthorship());
|
185
|
book.setAuthorship(bookAuthor);
|
186
|
bookSection.setInReference(book);
|
187
|
bookSection.setDatePublished(book.getDatePublished());
|
188
|
newName.setNomenclaturalReference(bookSection);
|
189
|
}else{
|
190
|
logger.warn(line + "unexpected booksection author handling");
|
191
|
}
|
192
|
}
|
193
|
}
|
194
|
|
195
|
private TeamOrPersonBase<?> parseBookSectionAuthor(String pubAuthor, String line) {
|
196
|
TeamOrPersonBase<?> result;
|
197
|
String ed = "";
|
198
|
if (pubAuthor.endsWith(" (ed.)")){
|
199
|
ed = " (ed.)";
|
200
|
}else if (pubAuthor.endsWith(" (eds.)")){
|
201
|
ed = " (eds.)";
|
202
|
}
|
203
|
pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length());
|
204
|
String[] splits = pubAuthor.split("(, | & )");
|
205
|
if (splits.length > 1){
|
206
|
Team team = Team.NewInstance();
|
207
|
result = team;
|
208
|
for (String split : splits){
|
209
|
if ("al.".equals(split.trim())){
|
210
|
team.setHasMoreMembers(true);
|
211
|
}else{
|
212
|
team.addTeamMember(getPerson(split, line));
|
213
|
}
|
214
|
}
|
215
|
}else{
|
216
|
result = getPerson(splits[0], line);
|
217
|
}
|
218
|
if (ed.length() > 0){
|
219
|
result.setTitleCache(result.getTitleCache() + ed, true);
|
220
|
}
|
221
|
return result;
|
222
|
}
|
223
|
|
224
|
private Person getPerson(String personStr, String line) {
|
225
|
Person result = Person.NewInstance();
|
226
|
String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?<famname>[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)";
|
227
|
// regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier";
|
228
|
Matcher matcher = Pattern.compile(regEx).matcher(personStr);
|
229
|
if (matcher.matches()){
|
230
|
String famName = matcher.group("famname");
|
231
|
result.setFamilyName(famName);
|
232
|
String initials = personStr.replace(famName,"").trim();
|
233
|
result.setInitials(initials);
|
234
|
}else{
|
235
|
result.setTitleCache(personStr, true);
|
236
|
logger.warn(line + "BookSection author could not be parsed: " + personStr);
|
237
|
}
|
238
|
return result;
|
239
|
}
|
240
|
|
241
|
private String replaceBookSectionAuthor(SimpleExcelTaxonImportState<CONFIG> state, String fullTitle) {
|
242
|
String type = getValue(state, PubTypeABSG);
|
243
|
if ("BS".equals(type)){
|
244
|
String pubAuthor = getValue(state, publication_author);
|
245
|
int inIndex = fullTitle.indexOf(" in ");
|
246
|
int commaIndex = fullTitle.indexOf(", ");
|
247
|
|
248
|
}
|
249
|
return fullTitle;
|
250
|
}
|
251
|
|
252
|
private void verifyName(SimpleExcelTaxonImportState<CONFIG> state, TaxonName taxonName,
|
253
|
Map<String, String> record, String line, boolean isNew) {
|
254
|
if (isNew){
|
255
|
boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line);
|
256
|
if (!parsed){
|
257
|
return;
|
258
|
}
|
259
|
}
|
260
|
String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew);
|
261
|
verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew);
|
262
|
verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew);
|
263
|
verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew);
|
264
|
String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship());
|
265
|
verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew);
|
266
|
String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship());
|
267
|
verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew);
|
268
|
|
269
|
//reference
|
270
|
Reference nomRef = taxonName.getNomenclaturalReference();
|
271
|
if (nomRef == null){
|
272
|
logger.warn(line + "no nom.ref. exists in existing name");
|
273
|
}else{
|
274
|
|
275
|
//place of publication
|
276
|
boolean hasInRef = nomRef.getInReference() != null;
|
277
|
String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ?
|
278
|
nomRef.getInReference().getAbbrevTitle() :
|
279
|
nomRef.getAbbrevTitle();
|
280
|
String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew);
|
281
|
//author
|
282
|
String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache();
|
283
|
verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew);
|
284
|
//vol and page
|
285
|
String existingVolume = getVolume(nomRef);
|
286
|
String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference());
|
287
|
verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew);
|
288
|
//year
|
289
|
verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew);
|
290
|
//pub type
|
291
|
verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew);
|
292
|
}
|
293
|
}
|
294
|
|
295
|
private String getVolume(Reference nomRef) {
|
296
|
Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef;
|
297
|
String vol = ref.getVolume();
|
298
|
String edition = ref.getEdition();
|
299
|
if (StringUtils.isNotBlank(edition)){
|
300
|
edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ",";
|
301
|
}
|
302
|
String series = ref.getSeriesPart();
|
303
|
if (StringUtils.isNotBlank(series)){
|
304
|
series = ", " + (isNumber(series)? "ser. ":"") + series + ",";
|
305
|
}
|
306
|
|
307
|
return vol;
|
308
|
}
|
309
|
|
310
|
private boolean isNumber(String edition) {
|
311
|
try {
|
312
|
Integer.valueOf(edition);
|
313
|
} catch (NumberFormatException e) {
|
314
|
return false;
|
315
|
}
|
316
|
return true;
|
317
|
}
|
318
|
|
319
|
private String authorAndExAuthor(TeamOrPersonBase<?> author,
|
320
|
TeamOrPersonBase<?> exAuthor) {
|
321
|
return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "")
|
322
|
+ author.getNomenclaturalTitleCache();
|
323
|
}
|
324
|
|
325
|
private String replaceStatus(String fullTitleCache) {
|
326
|
return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", "");
|
327
|
}
|
328
|
|
329
|
private String abbrefRefType(ReferenceType type) {
|
330
|
return type == ReferenceType.Article ? "A" :
|
331
|
type == ReferenceType.Book ? "B" :
|
332
|
type == ReferenceType.BookSection ? "BS" :
|
333
|
type == ReferenceType.Generic ? "GEN" :
|
334
|
type.getLabel() ;
|
335
|
}
|
336
|
|
337
|
private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line, String noLogIf, boolean isNew) {
|
338
|
return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew);
|
339
|
}
|
340
|
|
341
|
private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line,
|
342
|
String noLogIf, String noLogIf2, boolean isNew) {
|
343
|
String value = getValue(record, fieldName);
|
344
|
if (!CdmUtils.nullSafeEqual(expectedValue, value)){
|
345
|
String diff = singleDiff(expectedValue, value);
|
346
|
String label = isNew ? "New " : "Existing";
|
347
|
if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){
|
348
|
System.out.println(" " + line + fieldName + "\n "+label+": " + expectedValue + "\n Kew : " + value);
|
349
|
}
|
350
|
return diff;
|
351
|
}else{
|
352
|
return "";
|
353
|
}
|
354
|
}
|
355
|
|
356
|
private String singleDiff(String expectedValue, String value) {
|
357
|
if (expectedValue == null){
|
358
|
return CdmUtils.Nz(value);
|
359
|
}else if (value == null){
|
360
|
return CdmUtils.Nz(expectedValue);
|
361
|
}
|
362
|
expectedValue = expectedValue.trim();
|
363
|
value = value.trim();
|
364
|
String diff_ab = StringUtils.difference(expectedValue, value);
|
365
|
String diff_ba = StringUtils.difference(value, expectedValue);
|
366
|
if (diff_ab.endsWith(diff_ba)){
|
367
|
return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length());
|
368
|
}else if (diff_ba.endsWith(diff_ab)){
|
369
|
return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length());
|
370
|
}else{
|
371
|
return NO_SIMPLE_DIFF;
|
372
|
}
|
373
|
}
|
374
|
|
375
|
private TaxonName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
376
|
String cdmNameUuid = getValue(state, CDM_Name_UUID);
|
377
|
String kewNameId = getValue(state, Kew_Name_ID);
|
378
|
if (cdmNameUuid == null){
|
379
|
return null;
|
380
|
}
|
381
|
TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid));
|
382
|
if (existingName != null){
|
383
|
putName(kewNameId, existingName.getUuid(), line);
|
384
|
return CdmBase.deproxy(existingName);
|
385
|
}else{
|
386
|
return null;
|
387
|
}
|
388
|
}
|
389
|
|
390
|
private void putName(String kewNameId, UUID uuid, String line) {
|
391
|
UUID existingUuid = nameMap.put(kewNameId, uuid);
|
392
|
if (existingUuid != null){
|
393
|
logger.warn(line + "Kew-Name-id already exists: " + kewNameId);
|
394
|
}
|
395
|
}
|
396
|
|
397
|
|
398
|
private void makeNameStatus(String line, Map<String, String> record,
|
399
|
TaxonName taxonName) {
|
400
|
String nameStatus = getValue(record, Kew_Nomencl_Status);
|
401
|
NomenclaturalStatusType status;
|
402
|
if (isBlank(nameStatus)){
|
403
|
status = null;
|
404
|
}else if ("Illegitimate".equals(nameStatus)){
|
405
|
status = NomenclaturalStatusType.ILLEGITIMATE();
|
406
|
}else if ("Invalid".equals(nameStatus)){
|
407
|
status = NomenclaturalStatusType.INVALID();
|
408
|
}else{
|
409
|
logger.warn(line + "Nom. status not recognized: " + nameStatus);
|
410
|
status = null;
|
411
|
}
|
412
|
if (status != null){
|
413
|
taxonName.addStatus(NomenclaturalStatus.NewInstance(status));
|
414
|
}
|
415
|
}
|
416
|
|
417
|
|
418
|
private TaxonBase<?> makeTaxonBase(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
419
|
Map<String, String> record, TaxonName taxonName, Reference sec) {
|
420
|
|
421
|
TaxonBase<?> taxonBase;
|
422
|
boolean isUnplaced = false;
|
423
|
String taxStatusStr = getValue(record, Kew_Taxonomic_Status);
|
424
|
|
425
|
if ("Accepted".equals(taxStatusStr)){
|
426
|
taxonBase = Taxon.NewInstance(taxonName, sec);
|
427
|
}else if ("Synonym".equals(taxStatusStr)){
|
428
|
taxonBase = Synonym.NewInstance(taxonName, sec);
|
429
|
}else if ("Artificial Hybrid".equals(taxStatusStr)){
|
430
|
taxonBase = Synonym.NewInstance(taxonName, sec);
|
431
|
}else if ("Unplaced".equals(taxStatusStr)){
|
432
|
taxonBase = Taxon.NewInstance(taxonName, sec);
|
433
|
}else{
|
434
|
logger.warn(line + "Status not handled: " + taxStatusStr);
|
435
|
return null;
|
436
|
}
|
437
|
taxonBase.addSource(makeOriginalSource(state));
|
438
|
taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid());
|
439
|
if (taxonBase instanceof Taxon){
|
440
|
UUID existing = taxonMap.get(taxonBase.getName().getNameCache());
|
441
|
if (existing == null || !isUnplaced){
|
442
|
taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
|
443
|
}else if (!isUnplaced){
|
444
|
taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
|
445
|
System.out.println(" " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache());
|
446
|
}
|
447
|
}
|
448
|
return taxonBase;
|
449
|
}
|
450
|
|
451
|
int c2 = 0;
|
452
|
@Override
|
453
|
protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
454
|
|
455
|
String kewId = getValue(state, Kew_Name_ID) + ": ";
|
456
|
String line = " (line: " + state.getCurrentLine() + ")";
|
457
|
// System.out.println(line);
|
458
|
if (c2++ % 100 == 0){
|
459
|
this.commitTransaction(state.getTransactionStatus());
|
460
|
this.classification = null;
|
461
|
this.secReference = null;
|
462
|
this.sourceReference = null;
|
463
|
this.orphanedSynonymTaxon = null;
|
464
|
TransactionStatus tx = this.startTransaction();
|
465
|
state.setTransactionStatus(tx);
|
466
|
logger.info(line + "New transaction started.");
|
467
|
}
|
468
|
Map<String, String> record = state.getOriginalRecord();
|
469
|
|
470
|
Classification classification = getClassification(state);
|
471
|
TaxonBase<?> taxonBase = getTaxon(record);
|
472
|
TaxonName taxonName = taxonBase.getName();
|
473
|
|
474
|
if (taxonBase.isInstanceOf(Taxon.class)){
|
475
|
Taxon parent = getParent(record, taxonName, line, kewId);
|
476
|
if (parent != null){
|
477
|
classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null);
|
478
|
}
|
479
|
}else if (taxonBase.isInstanceOf(Synonym.class)){
|
480
|
Taxon taxon = getAcceptedTaxon(record, line, kewId);
|
481
|
if (taxon == null){
|
482
|
taxon = getOrphanedSynonymTaxon(state);
|
483
|
logger.warn(kewId + "Accepted taxon not found. Added synonym to 'orphaned synonym taxon': " + getValue(record, Kew_Rel_Acc_Name_ID) + line);
|
484
|
}
|
485
|
taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF());
|
486
|
}else{
|
487
|
logger.warn("Unhandled");
|
488
|
}
|
489
|
|
490
|
String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID);
|
491
|
if (basionymId != null){
|
492
|
UUID basionymUuid = nameMap.get(basionymId);
|
493
|
TaxonName basionym = getNameService().find(basionymUuid);
|
494
|
if(basionym == null){
|
495
|
logger.warn(kewId + "Basionym does not exist: " + basionymId + line);
|
496
|
}else{
|
497
|
taxonName.addBasionym(basionym);
|
498
|
taxonName.mergeHomotypicGroups(basionym); //just in case this is not automatically done
|
499
|
//TODO
|
500
|
// adjustSynonymType(taxonBase, basionymTaxon, line);
|
501
|
}
|
502
|
}
|
503
|
|
504
|
}
|
505
|
|
506
|
Taxon orphanedSynonymTaxon;
|
507
|
private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState<CONFIG> state) {
|
508
|
if (orphanedSynonymTaxon != null) {
|
509
|
return orphanedSynonymTaxon;
|
510
|
}
|
511
|
UUID orphanedTaxonUuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON);
|
512
|
orphanedSynonymTaxon = CdmBase.deproxy(getTaxonService().find(orphanedTaxonUuid), Taxon.class);
|
513
|
if (orphanedSynonymTaxon == null){
|
514
|
TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY());
|
515
|
placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true);
|
516
|
orphanedSynonymTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord()));
|
517
|
orphanedSynonymTaxon.setUuid(orphanedTaxonUuid);
|
518
|
Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
|
519
|
TaxonNode orphandNode = getClassification(state).addParentChild(unplacedTaxon, orphanedSynonymTaxon, null, null);
|
520
|
getTaxonNodeService().save(orphandNode);
|
521
|
}
|
522
|
return orphanedSynonymTaxon;
|
523
|
}
|
524
|
|
525
|
private Classification classification;
|
526
|
private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
|
527
|
if (classification == null){
|
528
|
classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
529
|
}
|
530
|
return classification;
|
531
|
}
|
532
|
|
533
|
private Taxon getAcceptedTaxon(Map<String, String> record, String line, String kewId) {
|
534
|
String statusStr = getValue(record, Kew_Taxonomic_Status);
|
535
|
if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){
|
536
|
String accKewId = getValue(record, Kew_Rel_Acc_Name_ID);
|
537
|
UUID accUuid = taxonMap.get(accKewId);
|
538
|
TaxonBase<?> accBase = getTaxonService().find(accUuid);
|
539
|
if (accBase == null){
|
540
|
logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line);
|
541
|
return null;
|
542
|
}else if (accBase.isInstanceOf(Synonym.class)){
|
543
|
logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line);
|
544
|
return null;
|
545
|
}else{
|
546
|
return CdmBase.deproxy(accBase, Taxon.class);
|
547
|
}
|
548
|
}else{
|
549
|
logger.warn(kewId + "Parent not retrieved" + line);
|
550
|
return null;
|
551
|
}
|
552
|
}
|
553
|
|
554
|
private Taxon getParent(Map<String, String> record, TaxonName taxonName, String line, String kewId) {
|
555
|
String statusStr = getValue(record, Kew_Taxonomic_Status);
|
556
|
if ("Unplaced".equals(statusStr)){
|
557
|
return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
|
558
|
}else if ("Artificial Hybrid".equals(statusStr)){
|
559
|
return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now
|
560
|
}else if ("Accepted".equals(statusStr)){
|
561
|
String higherName = getHigherRankName(taxonName);
|
562
|
UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName);
|
563
|
if (parentTaxonUuid != null){
|
564
|
TaxonBase<?> parentBase = getTaxonService().find(parentTaxonUuid);
|
565
|
if (parentBase == null){
|
566
|
return null;
|
567
|
} else if (parentBase.isInstanceOf(Taxon.class)){
|
568
|
Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class);
|
569
|
return parentTaxon;
|
570
|
} else {
|
571
|
logger.warn(kewId + "Parent is synonym " + line);
|
572
|
return null;
|
573
|
}
|
574
|
}else{
|
575
|
return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class);
|
576
|
}
|
577
|
}else if ("Synonym".equals(statusStr)){
|
578
|
//not relevant
|
579
|
return null;
|
580
|
}else{
|
581
|
logger.warn(kewId + "Parent not retrieved" + line);
|
582
|
return null;
|
583
|
}
|
584
|
}
|
585
|
|
586
|
private String getHigherRankName(TaxonName taxonName) {
|
587
|
if (Rank.SPECIES().equals(taxonName.getRank())){
|
588
|
return taxonName.getGenusOrUninomial();
|
589
|
}else if (taxonName.isInfraSpecific()){
|
590
|
return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet();
|
591
|
}
|
592
|
return null;
|
593
|
}
|
594
|
|
595
|
private void adjustSynonymType(TaxonBase<?> taxonBase, TaxonBase<?> homotypicTaxon, String line) {
|
596
|
adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line);
|
597
|
adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line);
|
598
|
}
|
599
|
|
600
|
private void adjustSynonymTypeOrdered(TaxonBase<?> firstTaxon, TaxonBase<?> secondTaxon, String line) {
|
601
|
if (firstTaxon == null){
|
602
|
logger.warn(line + "first taxon is null for adjust synonym type");
|
603
|
}else if (secondTaxon == null){
|
604
|
logger.warn(line + "second taxon is null for adjust synonym type");
|
605
|
}else if (secondTaxon.isInstanceOf(Synonym.class)){
|
606
|
Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class);
|
607
|
if (firstTaxon.equals(syn.getAcceptedTaxon())){
|
608
|
syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF());
|
609
|
}
|
610
|
}
|
611
|
}
|
612
|
|
613
|
protected TaxonBase<?> getTaxon(Map<String, String> record) {
|
614
|
String kew_name_id = getValue(record, Kew_Name_ID);
|
615
|
UUID taxonUuid = taxonMap.get(kew_name_id);
|
616
|
TaxonBase<?> taxon = getTaxonService().find(taxonUuid);
|
617
|
return taxon;
|
618
|
}
|
619
|
|
620
|
private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) {
|
621
|
boolean result = true;
|
622
|
if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) {
|
623
|
logger.warn(line + "Name could not be parsed: " + fullName);
|
624
|
result = false;
|
625
|
}
|
626
|
Reference nomRef = name.getNomenclaturalReference();
|
627
|
if (nomRef != null && (nomRef.isProtectedTitleCache()
|
628
|
|| nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){
|
629
|
logger.warn(line + "Nom ref could not be parsed: " + fullName);
|
630
|
result = false;
|
631
|
}
|
632
|
if (nameStr != null && !name.getTitleCache().equals(nameStr)){
|
633
|
logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr);
|
634
|
result = false;
|
635
|
}
|
636
|
return result;
|
637
|
}
|
638
|
|
639
|
private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record) {
|
640
|
if (this.secReference == null){
|
641
|
logger.warn("Load sec ref");
|
642
|
String secUuid = record.get(Sec_Ref_CDM_UUID);
|
643
|
secReference = getReferenceService().load(UUID.fromString(secUuid));
|
644
|
if (this.secReference == null){
|
645
|
logger.warn("Sec ref is null");
|
646
|
}
|
647
|
}
|
648
|
return this.secReference;
|
649
|
}
|
650
|
|
651
|
private Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
|
652
|
if (this.sourceReference == null){
|
653
|
this.sourceReference = getPersistentReference(state.getConfig().getSourceReference());
|
654
|
}
|
655
|
return this.sourceReference;
|
656
|
}
|
657
|
|
658
|
private Reference getPersistentReference(Reference reference) {
|
659
|
Reference result = getReferenceService().find(reference.getUuid());
|
660
|
logger.warn("Loaded persistent reference: "+ reference.getUuid());
|
661
|
if (result == null){
|
662
|
logger.warn("Persistent reference is null: " + reference.getUuid());
|
663
|
result = reference;
|
664
|
getReferenceService().saveOrUpdate(result);
|
665
|
}
|
666
|
return result;
|
667
|
}
|
668
|
|
669
|
private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState<CONFIG> state, INonViralName name) {
|
670
|
state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
|
671
|
}
|
672
|
|
673
|
|
674
|
@Override
|
675
|
protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
|
676
|
String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID);
|
677
|
return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state));
|
678
|
}
|
679
|
}
|