|
1 |
/**
|
|
2 |
* Copyright (C) 2016 EDIT
|
|
3 |
* European Distributed Institute of Taxonomy
|
|
4 |
* http://www.e-taxonomy.eu
|
|
5 |
*
|
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms.
|
|
8 |
*/
|
|
9 |
package eu.etaxonomy.cdm.io.caryo;
|
|
10 |
|
|
11 |
import java.util.Arrays;
|
|
12 |
import java.util.HashMap;
|
|
13 |
import java.util.List;
|
|
14 |
import java.util.Map;
|
|
15 |
import java.util.Set;
|
|
16 |
import java.util.UUID;
|
|
17 |
import java.util.regex.Matcher;
|
|
18 |
import java.util.regex.Pattern;
|
|
19 |
|
|
20 |
import org.apache.commons.lang3.StringUtils;
|
|
21 |
import org.apache.log4j.Logger;
|
|
22 |
import org.springframework.stereotype.Component;
|
|
23 |
import org.springframework.transaction.TransactionStatus;
|
|
24 |
|
|
25 |
import eu.etaxonomy.cdm.common.CdmUtils;
|
|
26 |
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
|
27 |
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
|
28 |
import eu.etaxonomy.cdm.model.agent.Person;
|
|
29 |
import eu.etaxonomy.cdm.model.agent.Team;
|
|
30 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
|
31 |
import eu.etaxonomy.cdm.model.common.CdmBase;
|
|
32 |
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
|
33 |
import eu.etaxonomy.cdm.model.name.INonViralName;
|
|
34 |
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
|
35 |
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
|
|
36 |
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
|
37 |
import eu.etaxonomy.cdm.model.name.Rank;
|
|
38 |
import eu.etaxonomy.cdm.model.name.TaxonName;
|
|
39 |
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
|
40 |
import eu.etaxonomy.cdm.model.reference.Reference;
|
|
41 |
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
|
42 |
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
|
43 |
import eu.etaxonomy.cdm.model.taxon.Classification;
|
|
44 |
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
|
45 |
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
|
46 |
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
|
47 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
|
48 |
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
|
49 |
|
|
50 |
/**
|
|
51 |
* Kew excel taxon import for Caryophyllaceae.
|
|
52 |
*
|
|
53 |
* @author a.mueller
|
|
54 |
* @since 05.01.2022
|
|
55 |
*/
|
|
56 |
@Component
|
|
57 |
public class KewExcelTaxonImport<CONFIG extends KewExcelTaxonImportConfigurator>
|
|
58 |
extends SimpleExcelTaxonImport<CONFIG>{
|
|
59 |
|
|
60 |
private static final long serialVersionUID = 1081966876789613803L;
|
|
61 |
private static final Logger logger = Logger.getLogger(KewExcelTaxonImport.class);
|
|
62 |
|
|
63 |
private static final String NO_SIMPLE_DIFF = "xxxxx";
|
|
64 |
|
|
65 |
private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0";
|
|
66 |
private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c";
|
|
67 |
private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060";
|
|
68 |
|
|
69 |
private static final String CDM_Name_UUID = "CDM-Name_UUID";
|
|
70 |
private static final String Kew_Name_ID = "Kew-Name-ID";
|
|
71 |
private static final String Kew_Name_Citation = "Kew-Name-Citation";
|
|
72 |
private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status";
|
|
73 |
private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status";
|
|
74 |
private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID";
|
|
75 |
private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID";
|
|
76 |
private static final String GENUS_HYBRID = "genus_hybrid";
|
|
77 |
private static final String GENUS = "genus";
|
|
78 |
private static final String SPECIES_HYBRID = "species_hybrid";
|
|
79 |
private static final String SPECIES = "species";
|
|
80 |
|
|
81 |
private static final String infraspecific_rank = "infraspecific_rank";
|
|
82 |
private static final String infraspecies = "infraspecies";
|
|
83 |
|
|
84 |
private static final String parenthetical_author = "parenthetical_author";
|
|
85 |
private static final String primary_author = "primary_author";
|
|
86 |
private static final String publication_author = "publication_author";
|
|
87 |
private static final String place_of_publication = "place_of_publication";
|
|
88 |
private static final String volume_and_page = "volume_and_page";
|
|
89 |
private static final String KewYear4CDM = "KewYear4CDM";
|
|
90 |
private static final String PubTypeABSG = "PubTypeABSG";
|
|
91 |
private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID";
|
|
92 |
|
|
93 |
private static final Map<String, UUID> nameMap = new HashMap<>();
|
|
94 |
private static final Map<String, UUID> taxonMap = new HashMap<>();
|
|
95 |
|
|
96 |
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
|
97 |
CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status,
|
|
98 |
Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS,
|
|
99 |
SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies,
|
|
100 |
parenthetical_author, primary_author, publication_author, place_of_publication,
|
|
101 |
volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID
|
|
102 |
});
|
|
103 |
|
|
104 |
private Reference sourceReference;
|
|
105 |
private Reference secReference;
|
|
106 |
|
|
107 |
private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
|
108 |
|
|
109 |
// @Override
|
|
110 |
// protected String getWorksheetName(CONFIG config) {
|
|
111 |
// return "valid taxa names";
|
|
112 |
// }
|
|
113 |
|
|
114 |
@Override
|
|
115 |
protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
|
116 |
|
|
117 |
String line = getLine(state, 50);
|
|
118 |
System.out.println(line);
|
|
119 |
Map<String, String> record = state.getOriginalRecord();
|
|
120 |
|
|
121 |
Set<String> keys = record.keySet();
|
|
122 |
for (String key: keys) {
|
|
123 |
if (! expectedKeys.contains(key)){
|
|
124 |
logger.warn(line + "Unexpected Key: " + key);
|
|
125 |
}
|
|
126 |
}
|
|
127 |
|
|
128 |
makeTaxon(state, line, record);
|
|
129 |
}
|
|
130 |
|
|
131 |
private void makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record) {
|
|
132 |
// state.getTransactionStatus().flush();
|
|
133 |
Reference sec = getSecReference(state, record);
|
|
134 |
|
|
135 |
//name
|
|
136 |
TaxonName existingName = getExistingName(state, line);
|
|
137 |
if (existingName != null){
|
|
138 |
verifyName(state, existingName, record, line, false);
|
|
139 |
}else{
|
|
140 |
existingName = createName(state, line);
|
|
141 |
}
|
|
142 |
|
|
143 |
//taxon
|
|
144 |
TaxonBase<?> taxonBase = makeTaxonBase(state, line, record, existingName, sec);
|
|
145 |
|
|
146 |
if (taxonBase != null){
|
|
147 |
getTaxonService().saveOrUpdate(taxonBase);
|
|
148 |
}
|
|
149 |
|
|
150 |
return;
|
|
151 |
}
|
|
152 |
|
|
153 |
private TaxonName createName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
|
154 |
//parse
|
|
155 |
String fullTitle = getValue(state, Kew_Name_Citation);
|
|
156 |
String kewNameId = getValue(state, Kew_Name_ID);
|
|
157 |
|
|
158 |
fullTitle = replaceBookSectionAuthor(state, fullTitle);
|
|
159 |
|
|
160 |
TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES());
|
|
161 |
handleBookSectionAuthor(newName, state, line);
|
|
162 |
|
|
163 |
putName(kewNameId, newName.getUuid(), line);
|
|
164 |
//name status
|
|
165 |
makeNameStatus(line, state.getOriginalRecord(), newName);
|
|
166 |
verifyName(state, newName, state.getOriginalRecord(), line, true);
|
|
167 |
//deduplication
|
|
168 |
replaceNameAuthorsAndReferences(state, newName);
|
|
169 |
newName.addSource(makeOriginalSource(state));
|
|
170 |
getNameService().saveOrUpdate(newName);
|
|
171 |
//Kew-Nomencl-Status
|
|
172 |
return newName;
|
|
173 |
}
|
|
174 |
|
|
175 |
private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
|
176 |
String type = getValue(state, PubTypeABSG);
|
|
177 |
if ("BS".equals(type)){
|
|
178 |
Reference book = newName.getNomenclaturalReference();
|
|
179 |
String pubAuthor = getValue(state, publication_author);
|
|
180 |
if (book != null && StringUtils.isNotEmpty(pubAuthor)){
|
|
181 |
TeamOrPersonBase<?> bookAuthor = parseBookSectionAuthor(pubAuthor, line);
|
|
182 |
Reference bookSection = ReferenceFactory.newBookSection();
|
|
183 |
bookSection.setAuthorship(book.getAuthorship());
|
|
184 |
book.setAuthorship(bookAuthor);
|
|
185 |
bookSection.setInReference(book);
|
|
186 |
bookSection.setDatePublished(book.getDatePublished());
|
|
187 |
newName.setNomenclaturalReference(bookSection);
|
|
188 |
}else{
|
|
189 |
logger.warn(line + "unexpected booksection author handling");
|
|
190 |
}
|
|
191 |
}
|
|
192 |
}
|
|
193 |
|
|
194 |
private TeamOrPersonBase<?> parseBookSectionAuthor(String pubAuthor, String line) {
|
|
195 |
TeamOrPersonBase<?> result;
|
|
196 |
String ed = "";
|
|
197 |
if (pubAuthor.endsWith(" (ed.)")){
|
|
198 |
ed = " (ed.)";
|
|
199 |
}else if (pubAuthor.endsWith(" (eds.)")){
|
|
200 |
ed = " (eds.)";
|
|
201 |
}
|
|
202 |
pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length());
|
|
203 |
String[] splits = pubAuthor.split("(, | & )");
|
|
204 |
if (splits.length > 1){
|
|
205 |
Team team = Team.NewInstance();
|
|
206 |
result = team;
|
|
207 |
for (String split : splits){
|
|
208 |
if ("al.".equals(split.trim())){
|
|
209 |
team.setHasMoreMembers(true);
|
|
210 |
}else{
|
|
211 |
team.addTeamMember(getPerson(split, line));
|
|
212 |
}
|
|
213 |
}
|
|
214 |
}else{
|
|
215 |
result = getPerson(splits[0], line);
|
|
216 |
}
|
|
217 |
if (ed.length() > 0){
|
|
218 |
result.setTitleCache(result.getTitleCache() + ed, true);
|
|
219 |
}
|
|
220 |
return result;
|
|
221 |
}
|
|
222 |
|
|
223 |
private Person getPerson(String personStr, String line) {
|
|
224 |
Person result = Person.NewInstance();
|
|
225 |
String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?<famname>[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)";
|
|
226 |
// regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier";
|
|
227 |
Matcher matcher = Pattern.compile(regEx).matcher(personStr);
|
|
228 |
if (matcher.matches()){
|
|
229 |
String famName = matcher.group("famname");
|
|
230 |
result.setFamilyName(famName);
|
|
231 |
String initials = personStr.replace(famName,"").trim();
|
|
232 |
result.setInitials(initials);
|
|
233 |
}else{
|
|
234 |
result.setTitleCache(personStr, true);
|
|
235 |
logger.warn(line + "BookSection author could not be parsed: " + personStr);
|
|
236 |
}
|
|
237 |
return result;
|
|
238 |
}
|
|
239 |
|
|
240 |
private String replaceBookSectionAuthor(SimpleExcelTaxonImportState<CONFIG> state, String fullTitle) {
|
|
241 |
String type = getValue(state, PubTypeABSG);
|
|
242 |
if ("BS".equals(type)){
|
|
243 |
String pubAuthor = getValue(state, publication_author);
|
|
244 |
int inIndex = fullTitle.indexOf(" in ");
|
|
245 |
int commaIndex = fullTitle.indexOf(", ");
|
|
246 |
|
|
247 |
}
|
|
248 |
return fullTitle;
|
|
249 |
}
|
|
250 |
|
|
251 |
private void verifyName(SimpleExcelTaxonImportState<CONFIG> state, TaxonName taxonName,
|
|
252 |
Map<String, String> record, String line, boolean isNew) {
|
|
253 |
if (isNew){
|
|
254 |
boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line);
|
|
255 |
if (!parsed){
|
|
256 |
return;
|
|
257 |
}
|
|
258 |
}
|
|
259 |
String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew);
|
|
260 |
verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew);
|
|
261 |
verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew);
|
|
262 |
verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew);
|
|
263 |
String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship());
|
|
264 |
verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew);
|
|
265 |
String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship());
|
|
266 |
verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew);
|
|
267 |
|
|
268 |
//reference
|
|
269 |
Reference nomRef = taxonName.getNomenclaturalReference();
|
|
270 |
if (nomRef == null){
|
|
271 |
logger.warn(line + "no nom.ref. exists in existing name");
|
|
272 |
}else{
|
|
273 |
|
|
274 |
//place of publication
|
|
275 |
boolean hasInRef = nomRef.getInReference() != null;
|
|
276 |
String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ?
|
|
277 |
nomRef.getInReference().getAbbrevTitle() :
|
|
278 |
nomRef.getAbbrevTitle();
|
|
279 |
String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew);
|
|
280 |
//author
|
|
281 |
String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache();
|
|
282 |
verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew);
|
|
283 |
//vol and page
|
|
284 |
String existingVolume = getVolume(nomRef);
|
|
285 |
String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference());
|
|
286 |
verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew);
|
|
287 |
//year
|
|
288 |
verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew);
|
|
289 |
//pub type
|
|
290 |
verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew);
|
|
291 |
}
|
|
292 |
}
|
|
293 |
|
|
294 |
private String getVolume(Reference nomRef) {
|
|
295 |
Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef;
|
|
296 |
String vol = ref.getVolume();
|
|
297 |
String edition = ref.getEdition();
|
|
298 |
if (StringUtils.isNotBlank(edition)){
|
|
299 |
edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ",";
|
|
300 |
}
|
|
301 |
String series = ref.getSeriesPart();
|
|
302 |
if (StringUtils.isNotBlank(series)){
|
|
303 |
series = ", " + (isNumber(series)? "ser. ":"") + series + ",";
|
|
304 |
}
|
|
305 |
|
|
306 |
return vol;
|
|
307 |
}
|
|
308 |
|
|
309 |
private boolean isNumber(String edition) {
|
|
310 |
try {
|
|
311 |
Integer.valueOf(edition);
|
|
312 |
} catch (NumberFormatException e) {
|
|
313 |
return false;
|
|
314 |
}
|
|
315 |
return true;
|
|
316 |
}
|
|
317 |
|
|
318 |
private String authorAndExAuthor(TeamOrPersonBase<?> author,
|
|
319 |
TeamOrPersonBase<?> exAuthor) {
|
|
320 |
return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "")
|
|
321 |
+ author.getNomenclaturalTitleCache();
|
|
322 |
}
|
|
323 |
|
|
324 |
private String replaceStatus(String fullTitleCache) {
|
|
325 |
return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", "");
|
|
326 |
}
|
|
327 |
|
|
328 |
private String abbrefRefType(ReferenceType type) {
|
|
329 |
return type == ReferenceType.Article ? "A" :
|
|
330 |
type == ReferenceType.Book ? "B" :
|
|
331 |
type == ReferenceType.BookSection ? "BS" :
|
|
332 |
type == ReferenceType.Generic ? "GEN" :
|
|
333 |
type.getLabel() ;
|
|
334 |
}
|
|
335 |
|
|
336 |
private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line, String noLogIf, boolean isNew) {
|
|
337 |
return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew);
|
|
338 |
}
|
|
339 |
|
|
340 |
private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line,
|
|
341 |
String noLogIf, String noLogIf2, boolean isNew) {
|
|
342 |
String value = getValue(record, fieldName);
|
|
343 |
if (!CdmUtils.nullSafeEqual(expectedValue, value)){
|
|
344 |
String diff = singleDiff(expectedValue, value);
|
|
345 |
String label = isNew ? "New " : "Existing";
|
|
346 |
if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){
|
|
347 |
System.out.println(" " + line + fieldName + "\n "+label+": " + expectedValue + "\n Kew : " + value);
|
|
348 |
}
|
|
349 |
return diff;
|
|
350 |
}else{
|
|
351 |
return "";
|
|
352 |
}
|
|
353 |
}
|
|
354 |
|
|
355 |
private String singleDiff(String expectedValue, String value) {
|
|
356 |
if (expectedValue == null){
|
|
357 |
return CdmUtils.Nz(value);
|
|
358 |
}else if (value == null){
|
|
359 |
return CdmUtils.Nz(expectedValue);
|
|
360 |
}
|
|
361 |
expectedValue = expectedValue.trim();
|
|
362 |
value = value.trim();
|
|
363 |
String diff_ab = StringUtils.difference(expectedValue, value);
|
|
364 |
String diff_ba = StringUtils.difference(value, expectedValue);
|
|
365 |
if (diff_ab.endsWith(diff_ba)){
|
|
366 |
return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length());
|
|
367 |
}else if (diff_ba.endsWith(diff_ab)){
|
|
368 |
return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length());
|
|
369 |
}else{
|
|
370 |
return NO_SIMPLE_DIFF;
|
|
371 |
}
|
|
372 |
}
|
|
373 |
|
|
374 |
private TaxonName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
|
375 |
String cdmNameUuid = getValue(state, CDM_Name_UUID);
|
|
376 |
String kewNameId = getValue(state, Kew_Name_ID);
|
|
377 |
if (cdmNameUuid == null){
|
|
378 |
return null;
|
|
379 |
}
|
|
380 |
TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid));
|
|
381 |
if (existingName != null){
|
|
382 |
putName(kewNameId, existingName.getUuid(), line);
|
|
383 |
return CdmBase.deproxy(existingName);
|
|
384 |
}else{
|
|
385 |
return null;
|
|
386 |
}
|
|
387 |
}
|
|
388 |
|
|
389 |
private void putName(String kewNameId, UUID uuid, String line) {
|
|
390 |
UUID existingUuid = nameMap.put(kewNameId, uuid);
|
|
391 |
if (existingUuid != null){
|
|
392 |
logger.warn(line + "Kew-Name-id already exists: " + kewNameId);
|
|
393 |
}
|
|
394 |
}
|
|
395 |
|
|
396 |
|
|
397 |
private void makeNameStatus(String line, Map<String, String> record,
|
|
398 |
TaxonName taxonName) {
|
|
399 |
String nameStatus = getValue(record, Kew_Nomencl_Status);
|
|
400 |
NomenclaturalStatusType status;
|
|
401 |
if (isBlank(nameStatus)){
|
|
402 |
status = null;
|
|
403 |
}else if ("Illegitimate".equals(nameStatus)){
|
|
404 |
status = NomenclaturalStatusType.ILLEGITIMATE();
|
|
405 |
}else if ("Invalid".equals(nameStatus)){
|
|
406 |
status = NomenclaturalStatusType.INVALID();
|
|
407 |
}else{
|
|
408 |
logger.warn(line + "Nom. status not recognized: " + nameStatus);
|
|
409 |
status = null;
|
|
410 |
}
|
|
411 |
if (status != null){
|
|
412 |
taxonName.addStatus(NomenclaturalStatus.NewInstance(status));
|
|
413 |
}
|
|
414 |
}
|
|
415 |
|
|
416 |
|
|
417 |
private TaxonBase<?> makeTaxonBase(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
|
418 |
Map<String, String> record, TaxonName taxonName, Reference sec) {
|
|
419 |
|
|
420 |
TaxonBase<?> taxonBase;
|
|
421 |
boolean isUnplaced = false;
|
|
422 |
String taxStatusStr = getValue(record, Kew_Taxonomic_Status);
|
|
423 |
|
|
424 |
if ("Accepted".equals(taxStatusStr)){
|
|
425 |
taxonBase = Taxon.NewInstance(taxonName, sec);
|
|
426 |
}else if ("Synonym".equals(taxStatusStr)){
|
|
427 |
taxonBase = Synonym.NewInstance(taxonName, sec);
|
|
428 |
}else if ("Artificial Hybrid".equals(taxStatusStr)){
|
|
429 |
taxonBase = Synonym.NewInstance(taxonName, sec);
|
|
430 |
}else if ("Unplaced".equals(taxStatusStr)){
|
|
431 |
taxonBase = Taxon.NewInstance(taxonName, sec);
|
|
432 |
}else{
|
|
433 |
logger.warn(line + "Status not handled: " + taxStatusStr);
|
|
434 |
return null;
|
|
435 |
}
|
|
436 |
taxonBase.addSource(makeOriginalSource(state));
|
|
437 |
taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid());
|
|
438 |
if (taxonBase instanceof Taxon){
|
|
439 |
UUID existing = taxonMap.get(taxonBase.getName().getNameCache());
|
|
440 |
if (existing == null || !isUnplaced){
|
|
441 |
taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
|
|
442 |
}else if (!isUnplaced){
|
|
443 |
taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
|
|
444 |
System.out.println(" " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache());
|
|
445 |
}
|
|
446 |
}
|
|
447 |
return taxonBase;
|
|
448 |
}
|
|
449 |
|
|
450 |
int c2 = 0;
|
|
451 |
@Override
|
|
452 |
protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
|
453 |
|
|
454 |
String kewId = getValue(state, Kew_Name_ID) + ": ";
|
|
455 |
String line = " (line: " + state.getCurrentLine() + ")";
|
|
456 |
// System.out.println(line);
|
|
457 |
if (c2++ % 100 == 0){
|
|
458 |
this.commitTransaction(state.getTransactionStatus());
|
|
459 |
this.classification = null;
|
|
460 |
this.secReference = null;
|
|
461 |
this.sourceReference = null;
|
|
462 |
TransactionStatus tx = this.startTransaction();
|
|
463 |
state.setTransactionStatus(tx);
|
|
464 |
logger.info(line + "New transaction started.");
|
|
465 |
}
|
|
466 |
Map<String, String> record = state.getOriginalRecord();
|
|
467 |
|
|
468 |
Classification classification = getClassification(state);
|
|
469 |
TaxonBase<?> taxonBase = getTaxon(record);
|
|
470 |
TaxonName taxonName = taxonBase.getName();
|
|
471 |
|
|
472 |
if (taxonBase.isInstanceOf(Taxon.class)){
|
|
473 |
Taxon parent = getParent(record, taxonName, line, kewId);
|
|
474 |
if (parent != null){
|
|
475 |
classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null);
|
|
476 |
}
|
|
477 |
}else if (taxonBase.isInstanceOf(Synonym.class)){
|
|
478 |
Taxon taxon = getAcceptedTaxon(record, line, kewId);
|
|
479 |
if (taxon == null){
|
|
480 |
logger.warn(kewId + "Accepted taxon not found: " + getValue(record, Kew_Rel_Acc_Name_ID) + line);
|
|
481 |
taxon = getOrphanedSynonymTaxon(state);
|
|
482 |
}else{
|
|
483 |
taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF());
|
|
484 |
}
|
|
485 |
}else{
|
|
486 |
logger.warn("Unhandled");
|
|
487 |
}
|
|
488 |
|
|
489 |
String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID);
|
|
490 |
if (basionymId != null){
|
|
491 |
UUID basionymUuid = nameMap.get(basionymId);
|
|
492 |
TaxonName basionym = getNameService().find(basionymUuid);
|
|
493 |
if(basionym == null){
|
|
494 |
logger.warn(kewId + "Basionym does not exist: " + basionymId + line);
|
|
495 |
}else{
|
|
496 |
taxonName.addBasionym(basionym);
|
|
497 |
taxonName.mergeHomotypicGroups(basionym); //just in case this is not automatically done
|
|
498 |
//TODO
|
|
499 |
// adjustSynonymType(taxonBase, basionymTaxon, line);
|
|
500 |
}
|
|
501 |
}
|
|
502 |
|
|
503 |
}
|
|
504 |
|
|
505 |
private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState<CONFIG> state) {
|
|
506 |
UUID uuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON);
|
|
507 |
Taxon placeholderTaxon = CdmBase.deproxy(getTaxonService().find(uuid), Taxon.class);
|
|
508 |
if (placeholderTaxon == null){
|
|
509 |
TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY());
|
|
510 |
placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true);
|
|
511 |
placeholderTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord()));
|
|
512 |
Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
|
|
513 |
getClassification(state).addParentChild(unplacedTaxon, placeholderTaxon, null, null);
|
|
514 |
}
|
|
515 |
return placeholderTaxon;
|
|
516 |
}
|
|
517 |
|
|
518 |
private Classification classification;
|
|
519 |
private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
|
|
520 |
if (classification == null){
|
|
521 |
classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
|
522 |
}
|
|
523 |
return classification;
|
|
524 |
}
|
|
525 |
|
|
526 |
private Taxon getAcceptedTaxon(Map<String, String> record, String line, String kewId) {
|
|
527 |
String statusStr = getValue(record, Kew_Taxonomic_Status);
|
|
528 |
if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){
|
|
529 |
String accKewId = getValue(record, Kew_Rel_Acc_Name_ID);
|
|
530 |
UUID accUuid = taxonMap.get(accKewId);
|
|
531 |
TaxonBase<?> accBase = getTaxonService().find(accUuid);
|
|
532 |
if (accBase == null){
|
|
533 |
logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line);
|
|
534 |
return null;
|
|
535 |
}else if (accBase.isInstanceOf(Synonym.class)){
|
|
536 |
logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line);
|
|
537 |
return null;
|
|
538 |
}else{
|
|
539 |
return CdmBase.deproxy(accBase, Taxon.class);
|
|
540 |
}
|
|
541 |
}else{
|
|
542 |
logger.warn(kewId + "Parent not retrieved" + line);
|
|
543 |
return null;
|
|
544 |
}
|
|
545 |
}
|
|
546 |
|
|
547 |
private Taxon getParent(Map<String, String> record, TaxonName taxonName, String line, String kewId) {
|
|
548 |
String statusStr = getValue(record, Kew_Taxonomic_Status);
|
|
549 |
if ("Unplaced".equals(statusStr)){
|
|
550 |
return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
|
|
551 |
}else if ("Artificial Hybrid".equals(statusStr)){
|
|
552 |
return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now
|
|
553 |
}else if ("Accepted".equals(statusStr)){
|
|
554 |
String higherName = getHigherRankName(taxonName);
|
|
555 |
UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName);
|
|
556 |
if (parentTaxonUuid != null){
|
|
557 |
TaxonBase<?> parentBase = getTaxonService().find(parentTaxonUuid);
|
|
558 |
if (parentBase == null){
|
|
559 |
return null;
|
|
560 |
} else if (parentBase.isInstanceOf(Taxon.class)){
|
|
561 |
Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class);
|
|
562 |
return parentTaxon;
|
|
563 |
} else {
|
|
564 |
logger.warn(kewId + "Parent is synonym " + line);
|
|
565 |
return null;
|
|
566 |
}
|
|
567 |
}else{
|
|
568 |
return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class);
|
|
569 |
}
|
|
570 |
}else if ("Synonym".equals(statusStr)){
|
|
571 |
//not relevant
|
|
572 |
return null;
|
|
573 |
}else{
|
|
574 |
logger.warn(kewId + "Parent not retrieved" + line);
|
|
575 |
return null;
|
|
576 |
}
|
|
577 |
}
|
|
578 |
|
|
579 |
private String getHigherRankName(TaxonName taxonName) {
|
|
580 |
if (Rank.SPECIES().equals(taxonName.getRank())){
|
|
581 |
return taxonName.getGenusOrUninomial();
|
|
582 |
}else if (taxonName.isInfraSpecific()){
|
|
583 |
return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet();
|
|
584 |
}
|
|
585 |
return null;
|
|
586 |
}
|
|
587 |
|
|
588 |
private void adjustSynonymType(TaxonBase<?> taxonBase, TaxonBase<?> homotypicTaxon, String line) {
|
|
589 |
adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line);
|
|
590 |
adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line);
|
|
591 |
}
|
|
592 |
|
|
593 |
private void adjustSynonymTypeOrdered(TaxonBase<?> firstTaxon, TaxonBase<?> secondTaxon, String line) {
|
|
594 |
if (firstTaxon == null){
|
|
595 |
logger.warn(line + "first taxon is null for adjust synonym type");
|
|
596 |
}else if (secondTaxon == null){
|
|
597 |
logger.warn(line + "second taxon is null for adjust synonym type");
|
|
598 |
}else if (secondTaxon.isInstanceOf(Synonym.class)){
|
|
599 |
Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class);
|
|
600 |
if (firstTaxon.equals(syn.getAcceptedTaxon())){
|
|
601 |
syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF());
|
|
602 |
}
|
|
603 |
}
|
|
604 |
}
|
|
605 |
|
|
606 |
protected TaxonBase<?> getTaxon(Map<String, String> record) {
|
|
607 |
String kew_name_id = getValue(record, Kew_Name_ID);
|
|
608 |
UUID taxonUuid = taxonMap.get(kew_name_id);
|
|
609 |
TaxonBase<?> taxon = getTaxonService().find(taxonUuid);
|
|
610 |
return taxon;
|
|
611 |
}
|
|
612 |
|
|
613 |
private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) {
|
|
614 |
boolean result = true;
|
|
615 |
if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) {
|
|
616 |
logger.warn(line + "Name could not be parsed: " + fullName);
|
|
617 |
result = false;
|
|
618 |
}
|
|
619 |
Reference nomRef = name.getNomenclaturalReference();
|
|
620 |
if (nomRef != null && (nomRef.isProtectedTitleCache()
|
|
621 |
|| nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){
|
|
622 |
logger.warn(line + "Nom ref could not be parsed: " + fullName);
|
|
623 |
result = false;
|
|
624 |
}
|
|
625 |
if (nameStr != null && !name.getTitleCache().equals(nameStr)){
|
|
626 |
logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr);
|
|
627 |
result = false;
|
|
628 |
}
|
|
629 |
return result;
|
|
630 |
}
|
|
631 |
|
|
632 |
private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record) {
|
|
633 |
if (this.secReference == null){
|
|
634 |
logger.warn("Load sec ref");
|
|
635 |
String secUuid = record.get(Sec_Ref_CDM_UUID);
|
|
636 |
secReference = getReferenceService().load(UUID.fromString(secUuid));
|
|
637 |
if (this.secReference == null){
|
|
638 |
logger.warn("Sec ref is null");
|
|
639 |
}
|
|
640 |
}
|
|
641 |
return this.secReference;
|
|
642 |
}
|
|
643 |
|
|
644 |
private Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
|
|
645 |
if (this.sourceReference == null){
|
|
646 |
this.sourceReference = getPersistentReference(state.getConfig().getSourceReference());
|
|
647 |
}
|
|
648 |
return this.sourceReference;
|
|
649 |
}
|
|
650 |
|
|
651 |
private Reference getPersistentReference(Reference reference) {
|
|
652 |
Reference result = getReferenceService().find(reference.getUuid());
|
|
653 |
logger.warn("Loaded persistent reference: "+ reference.getUuid());
|
|
654 |
if (result == null){
|
|
655 |
logger.warn("Persistent reference is null: " + reference.getUuid());
|
|
656 |
result = reference;
|
|
657 |
getReferenceService().saveOrUpdate(result);
|
|
658 |
}
|
|
659 |
return result;
|
|
660 |
}
|
|
661 |
|
|
662 |
private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState<CONFIG> state, INonViralName name) {
|
|
663 |
state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
|
|
664 |
}
|
|
665 |
|
|
666 |
|
|
667 |
@Override
|
|
668 |
protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
|
|
669 |
String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID);
|
|
670 |
return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state));
|
|
671 |
}
|
|
672 |
}
|
ref #9918 first version of caryophyllaceae import