ref #9918 first version of caryophyllaceae import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / caryo / KewExcelTaxonImport.java
1 /**
2 * Copyright (C) 2016 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.caryo;
10
11 import java.util.Arrays;
12 import java.util.HashMap;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.UUID;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19
20 import org.apache.commons.lang3.StringUtils;
21 import org.apache.log4j.Logger;
22 import org.springframework.stereotype.Component;
23 import org.springframework.transaction.TransactionStatus;
24
25 import eu.etaxonomy.cdm.common.CdmUtils;
26 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
27 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
28 import eu.etaxonomy.cdm.model.agent.Person;
29 import eu.etaxonomy.cdm.model.agent.Team;
30 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31 import eu.etaxonomy.cdm.model.common.CdmBase;
32 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
33 import eu.etaxonomy.cdm.model.name.INonViralName;
34 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
35 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
36 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
37 import eu.etaxonomy.cdm.model.name.Rank;
38 import eu.etaxonomy.cdm.model.name.TaxonName;
39 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
40 import eu.etaxonomy.cdm.model.reference.Reference;
41 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
42 import eu.etaxonomy.cdm.model.reference.ReferenceType;
43 import eu.etaxonomy.cdm.model.taxon.Classification;
44 import eu.etaxonomy.cdm.model.taxon.Synonym;
45 import eu.etaxonomy.cdm.model.taxon.SynonymType;
46 import eu.etaxonomy.cdm.model.taxon.Taxon;
47 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
48 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
49
50 /**
51 * Kew excel taxon import for Caryophyllaceae.
52 *
53 * @author a.mueller
54 * @since 05.01.2022
55 */
56 @Component
57 public class KewExcelTaxonImport<CONFIG extends KewExcelTaxonImportConfigurator>
58 extends SimpleExcelTaxonImport<CONFIG>{
59
60 private static final long serialVersionUID = 1081966876789613803L;
61 private static final Logger logger = Logger.getLogger(KewExcelTaxonImport.class);
62
63 private static final String NO_SIMPLE_DIFF = "xxxxx";
64
65 private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0";
66 private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c";
67 private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060";
68
69 private static final String CDM_Name_UUID = "CDM-Name_UUID";
70 private static final String Kew_Name_ID = "Kew-Name-ID";
71 private static final String Kew_Name_Citation = "Kew-Name-Citation";
72 private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status";
73 private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status";
74 private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID";
75 private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID";
76 private static final String GENUS_HYBRID = "genus_hybrid";
77 private static final String GENUS = "genus";
78 private static final String SPECIES_HYBRID = "species_hybrid";
79 private static final String SPECIES = "species";
80
81 private static final String infraspecific_rank = "infraspecific_rank";
82 private static final String infraspecies = "infraspecies";
83
84 private static final String parenthetical_author = "parenthetical_author";
85 private static final String primary_author = "primary_author";
86 private static final String publication_author = "publication_author";
87 private static final String place_of_publication = "place_of_publication";
88 private static final String volume_and_page = "volume_and_page";
89 private static final String KewYear4CDM = "KewYear4CDM";
90 private static final String PubTypeABSG = "PubTypeABSG";
91 private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID";
92
93 private static final Map<String, UUID> nameMap = new HashMap<>();
94 private static final Map<String, UUID> taxonMap = new HashMap<>();
95
96 private static List<String> expectedKeys= Arrays.asList(new String[]{
97 CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status,
98 Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS,
99 SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies,
100 parenthetical_author, primary_author, publication_author, place_of_publication,
101 volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID
102 });
103
104 private Reference sourceReference;
105 private Reference secReference;
106
107 private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
108
109 // @Override
110 // protected String getWorksheetName(CONFIG config) {
111 // return "valid taxa names";
112 // }
113
114 @Override
115 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
116
117 String line = getLine(state, 50);
118 System.out.println(line);
119 Map<String, String> record = state.getOriginalRecord();
120
121 Set<String> keys = record.keySet();
122 for (String key: keys) {
123 if (! expectedKeys.contains(key)){
124 logger.warn(line + "Unexpected Key: " + key);
125 }
126 }
127
128 makeTaxon(state, line, record);
129 }
130
131 private void makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record) {
132 // state.getTransactionStatus().flush();
133 Reference sec = getSecReference(state, record);
134
135 //name
136 TaxonName existingName = getExistingName(state, line);
137 if (existingName != null){
138 verifyName(state, existingName, record, line, false);
139 }else{
140 existingName = createName(state, line);
141 }
142
143 //taxon
144 TaxonBase<?> taxonBase = makeTaxonBase(state, line, record, existingName, sec);
145
146 if (taxonBase != null){
147 getTaxonService().saveOrUpdate(taxonBase);
148 }
149
150 return;
151 }
152
153 private TaxonName createName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
154 //parse
155 String fullTitle = getValue(state, Kew_Name_Citation);
156 String kewNameId = getValue(state, Kew_Name_ID);
157
158 fullTitle = replaceBookSectionAuthor(state, fullTitle);
159
160 TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES());
161 handleBookSectionAuthor(newName, state, line);
162
163 putName(kewNameId, newName.getUuid(), line);
164 //name status
165 makeNameStatus(line, state.getOriginalRecord(), newName);
166 verifyName(state, newName, state.getOriginalRecord(), line, true);
167 //deduplication
168 replaceNameAuthorsAndReferences(state, newName);
169 newName.addSource(makeOriginalSource(state));
170 getNameService().saveOrUpdate(newName);
171 //Kew-Nomencl-Status
172 return newName;
173 }
174
175 private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState<CONFIG> state, String line) {
176 String type = getValue(state, PubTypeABSG);
177 if ("BS".equals(type)){
178 Reference book = newName.getNomenclaturalReference();
179 String pubAuthor = getValue(state, publication_author);
180 if (book != null && StringUtils.isNotEmpty(pubAuthor)){
181 TeamOrPersonBase<?> bookAuthor = parseBookSectionAuthor(pubAuthor, line);
182 Reference bookSection = ReferenceFactory.newBookSection();
183 bookSection.setAuthorship(book.getAuthorship());
184 book.setAuthorship(bookAuthor);
185 bookSection.setInReference(book);
186 bookSection.setDatePublished(book.getDatePublished());
187 newName.setNomenclaturalReference(bookSection);
188 }else{
189 logger.warn(line + "unexpected booksection author handling");
190 }
191 }
192 }
193
194 private TeamOrPersonBase<?> parseBookSectionAuthor(String pubAuthor, String line) {
195 TeamOrPersonBase<?> result;
196 String ed = "";
197 if (pubAuthor.endsWith(" (ed.)")){
198 ed = " (ed.)";
199 }else if (pubAuthor.endsWith(" (eds.)")){
200 ed = " (eds.)";
201 }
202 pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length());
203 String[] splits = pubAuthor.split("(, | & )");
204 if (splits.length > 1){
205 Team team = Team.NewInstance();
206 result = team;
207 for (String split : splits){
208 if ("al.".equals(split.trim())){
209 team.setHasMoreMembers(true);
210 }else{
211 team.addTeamMember(getPerson(split, line));
212 }
213 }
214 }else{
215 result = getPerson(splits[0], line);
216 }
217 if (ed.length() > 0){
218 result.setTitleCache(result.getTitleCache() + ed, true);
219 }
220 return result;
221 }
222
223 private Person getPerson(String personStr, String line) {
224 Person result = Person.NewInstance();
225 String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?<famname>[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)";
226 // regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier";
227 Matcher matcher = Pattern.compile(regEx).matcher(personStr);
228 if (matcher.matches()){
229 String famName = matcher.group("famname");
230 result.setFamilyName(famName);
231 String initials = personStr.replace(famName,"").trim();
232 result.setInitials(initials);
233 }else{
234 result.setTitleCache(personStr, true);
235 logger.warn(line + "BookSection author could not be parsed: " + personStr);
236 }
237 return result;
238 }
239
240 private String replaceBookSectionAuthor(SimpleExcelTaxonImportState<CONFIG> state, String fullTitle) {
241 String type = getValue(state, PubTypeABSG);
242 if ("BS".equals(type)){
243 String pubAuthor = getValue(state, publication_author);
244 int inIndex = fullTitle.indexOf(" in ");
245 int commaIndex = fullTitle.indexOf(", ");
246
247 }
248 return fullTitle;
249 }
250
251 private void verifyName(SimpleExcelTaxonImportState<CONFIG> state, TaxonName taxonName,
252 Map<String, String> record, String line, boolean isNew) {
253 if (isNew){
254 boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line);
255 if (!parsed){
256 return;
257 }
258 }
259 String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew);
260 verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew);
261 verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew);
262 verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew);
263 String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship());
264 verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew);
265 String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship());
266 verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew);
267
268 //reference
269 Reference nomRef = taxonName.getNomenclaturalReference();
270 if (nomRef == null){
271 logger.warn(line + "no nom.ref. exists in existing name");
272 }else{
273
274 //place of publication
275 boolean hasInRef = nomRef.getInReference() != null;
276 String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ?
277 nomRef.getInReference().getAbbrevTitle() :
278 nomRef.getAbbrevTitle();
279 String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew);
280 //author
281 String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache();
282 verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew);
283 //vol and page
284 String existingVolume = getVolume(nomRef);
285 String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference());
286 verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew);
287 //year
288 verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew);
289 //pub type
290 verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew);
291 }
292 }
293
294 private String getVolume(Reference nomRef) {
295 Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef;
296 String vol = ref.getVolume();
297 String edition = ref.getEdition();
298 if (StringUtils.isNotBlank(edition)){
299 edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ",";
300 }
301 String series = ref.getSeriesPart();
302 if (StringUtils.isNotBlank(series)){
303 series = ", " + (isNumber(series)? "ser. ":"") + series + ",";
304 }
305
306 return vol;
307 }
308
309 private boolean isNumber(String edition) {
310 try {
311 Integer.valueOf(edition);
312 } catch (NumberFormatException e) {
313 return false;
314 }
315 return true;
316 }
317
318 private String authorAndExAuthor(TeamOrPersonBase<?> author,
319 TeamOrPersonBase<?> exAuthor) {
320 return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "")
321 + author.getNomenclaturalTitleCache();
322 }
323
324 private String replaceStatus(String fullTitleCache) {
325 return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", "");
326 }
327
328 private String abbrefRefType(ReferenceType type) {
329 return type == ReferenceType.Article ? "A" :
330 type == ReferenceType.Book ? "B" :
331 type == ReferenceType.BookSection ? "BS" :
332 type == ReferenceType.Generic ? "GEN" :
333 type.getLabel() ;
334 }
335
336 private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line, String noLogIf, boolean isNew) {
337 return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew);
338 }
339
340 private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line,
341 String noLogIf, String noLogIf2, boolean isNew) {
342 String value = getValue(record, fieldName);
343 if (!CdmUtils.nullSafeEqual(expectedValue, value)){
344 String diff = singleDiff(expectedValue, value);
345 String label = isNew ? "New " : "Existing";
346 if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){
347 System.out.println(" " + line + fieldName + "\n "+label+": " + expectedValue + "\n Kew : " + value);
348 }
349 return diff;
350 }else{
351 return "";
352 }
353 }
354
355 private String singleDiff(String expectedValue, String value) {
356 if (expectedValue == null){
357 return CdmUtils.Nz(value);
358 }else if (value == null){
359 return CdmUtils.Nz(expectedValue);
360 }
361 expectedValue = expectedValue.trim();
362 value = value.trim();
363 String diff_ab = StringUtils.difference(expectedValue, value);
364 String diff_ba = StringUtils.difference(value, expectedValue);
365 if (diff_ab.endsWith(diff_ba)){
366 return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length());
367 }else if (diff_ba.endsWith(diff_ab)){
368 return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length());
369 }else{
370 return NO_SIMPLE_DIFF;
371 }
372 }
373
374 private TaxonName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
375 String cdmNameUuid = getValue(state, CDM_Name_UUID);
376 String kewNameId = getValue(state, Kew_Name_ID);
377 if (cdmNameUuid == null){
378 return null;
379 }
380 TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid));
381 if (existingName != null){
382 putName(kewNameId, existingName.getUuid(), line);
383 return CdmBase.deproxy(existingName);
384 }else{
385 return null;
386 }
387 }
388
389 private void putName(String kewNameId, UUID uuid, String line) {
390 UUID existingUuid = nameMap.put(kewNameId, uuid);
391 if (existingUuid != null){
392 logger.warn(line + "Kew-Name-id already exists: " + kewNameId);
393 }
394 }
395
396
397 private void makeNameStatus(String line, Map<String, String> record,
398 TaxonName taxonName) {
399 String nameStatus = getValue(record, Kew_Nomencl_Status);
400 NomenclaturalStatusType status;
401 if (isBlank(nameStatus)){
402 status = null;
403 }else if ("Illegitimate".equals(nameStatus)){
404 status = NomenclaturalStatusType.ILLEGITIMATE();
405 }else if ("Invalid".equals(nameStatus)){
406 status = NomenclaturalStatusType.INVALID();
407 }else{
408 logger.warn(line + "Nom. status not recognized: " + nameStatus);
409 status = null;
410 }
411 if (status != null){
412 taxonName.addStatus(NomenclaturalStatus.NewInstance(status));
413 }
414 }
415
416
417 private TaxonBase<?> makeTaxonBase(SimpleExcelTaxonImportState<CONFIG> state, String line,
418 Map<String, String> record, TaxonName taxonName, Reference sec) {
419
420 TaxonBase<?> taxonBase;
421 boolean isUnplaced = false;
422 String taxStatusStr = getValue(record, Kew_Taxonomic_Status);
423
424 if ("Accepted".equals(taxStatusStr)){
425 taxonBase = Taxon.NewInstance(taxonName, sec);
426 }else if ("Synonym".equals(taxStatusStr)){
427 taxonBase = Synonym.NewInstance(taxonName, sec);
428 }else if ("Artificial Hybrid".equals(taxStatusStr)){
429 taxonBase = Synonym.NewInstance(taxonName, sec);
430 }else if ("Unplaced".equals(taxStatusStr)){
431 taxonBase = Taxon.NewInstance(taxonName, sec);
432 }else{
433 logger.warn(line + "Status not handled: " + taxStatusStr);
434 return null;
435 }
436 taxonBase.addSource(makeOriginalSource(state));
437 taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid());
438 if (taxonBase instanceof Taxon){
439 UUID existing = taxonMap.get(taxonBase.getName().getNameCache());
440 if (existing == null || !isUnplaced){
441 taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
442 }else if (!isUnplaced){
443 taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
444 System.out.println(" " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache());
445 }
446 }
447 return taxonBase;
448 }
449
450 int c2 = 0;
451 @Override
452 protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
453
454 String kewId = getValue(state, Kew_Name_ID) + ": ";
455 String line = " (line: " + state.getCurrentLine() + ")";
456 // System.out.println(line);
457 if (c2++ % 100 == 0){
458 this.commitTransaction(state.getTransactionStatus());
459 this.classification = null;
460 this.secReference = null;
461 this.sourceReference = null;
462 TransactionStatus tx = this.startTransaction();
463 state.setTransactionStatus(tx);
464 logger.info(line + "New transaction started.");
465 }
466 Map<String, String> record = state.getOriginalRecord();
467
468 Classification classification = getClassification(state);
469 TaxonBase<?> taxonBase = getTaxon(record);
470 TaxonName taxonName = taxonBase.getName();
471
472 if (taxonBase.isInstanceOf(Taxon.class)){
473 Taxon parent = getParent(record, taxonName, line, kewId);
474 if (parent != null){
475 classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null);
476 }
477 }else if (taxonBase.isInstanceOf(Synonym.class)){
478 Taxon taxon = getAcceptedTaxon(record, line, kewId);
479 if (taxon == null){
480 logger.warn(kewId + "Accepted taxon not found: " + getValue(record, Kew_Rel_Acc_Name_ID) + line);
481 taxon = getOrphanedSynonymTaxon(state);
482 }else{
483 taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF());
484 }
485 }else{
486 logger.warn("Unhandled");
487 }
488
489 String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID);
490 if (basionymId != null){
491 UUID basionymUuid = nameMap.get(basionymId);
492 TaxonName basionym = getNameService().find(basionymUuid);
493 if(basionym == null){
494 logger.warn(kewId + "Basionym does not exist: " + basionymId + line);
495 }else{
496 taxonName.addBasionym(basionym);
497 taxonName.mergeHomotypicGroups(basionym); //just in case this is not automatically done
498 //TODO
499 // adjustSynonymType(taxonBase, basionymTaxon, line);
500 }
501 }
502
503 }
504
505 private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState<CONFIG> state) {
506 UUID uuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON);
507 Taxon placeholderTaxon = CdmBase.deproxy(getTaxonService().find(uuid), Taxon.class);
508 if (placeholderTaxon == null){
509 TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY());
510 placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true);
511 placeholderTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord()));
512 Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
513 getClassification(state).addParentChild(unplacedTaxon, placeholderTaxon, null, null);
514 }
515 return placeholderTaxon;
516 }
517
518 private Classification classification;
519 private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
520 if (classification == null){
521 classification = getClassificationService().find(state.getConfig().getClassificationUuid());
522 }
523 return classification;
524 }
525
526 private Taxon getAcceptedTaxon(Map<String, String> record, String line, String kewId) {
527 String statusStr = getValue(record, Kew_Taxonomic_Status);
528 if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){
529 String accKewId = getValue(record, Kew_Rel_Acc_Name_ID);
530 UUID accUuid = taxonMap.get(accKewId);
531 TaxonBase<?> accBase = getTaxonService().find(accUuid);
532 if (accBase == null){
533 logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line);
534 return null;
535 }else if (accBase.isInstanceOf(Synonym.class)){
536 logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line);
537 return null;
538 }else{
539 return CdmBase.deproxy(accBase, Taxon.class);
540 }
541 }else{
542 logger.warn(kewId + "Parent not retrieved" + line);
543 return null;
544 }
545 }
546
547 private Taxon getParent(Map<String, String> record, TaxonName taxonName, String line, String kewId) {
548 String statusStr = getValue(record, Kew_Taxonomic_Status);
549 if ("Unplaced".equals(statusStr)){
550 return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
551 }else if ("Artificial Hybrid".equals(statusStr)){
552 return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now
553 }else if ("Accepted".equals(statusStr)){
554 String higherName = getHigherRankName(taxonName);
555 UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName);
556 if (parentTaxonUuid != null){
557 TaxonBase<?> parentBase = getTaxonService().find(parentTaxonUuid);
558 if (parentBase == null){
559 return null;
560 } else if (parentBase.isInstanceOf(Taxon.class)){
561 Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class);
562 return parentTaxon;
563 } else {
564 logger.warn(kewId + "Parent is synonym " + line);
565 return null;
566 }
567 }else{
568 return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class);
569 }
570 }else if ("Synonym".equals(statusStr)){
571 //not relevant
572 return null;
573 }else{
574 logger.warn(kewId + "Parent not retrieved" + line);
575 return null;
576 }
577 }
578
579 private String getHigherRankName(TaxonName taxonName) {
580 if (Rank.SPECIES().equals(taxonName.getRank())){
581 return taxonName.getGenusOrUninomial();
582 }else if (taxonName.isInfraSpecific()){
583 return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet();
584 }
585 return null;
586 }
587
588 private void adjustSynonymType(TaxonBase<?> taxonBase, TaxonBase<?> homotypicTaxon, String line) {
589 adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line);
590 adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line);
591 }
592
593 private void adjustSynonymTypeOrdered(TaxonBase<?> firstTaxon, TaxonBase<?> secondTaxon, String line) {
594 if (firstTaxon == null){
595 logger.warn(line + "first taxon is null for adjust synonym type");
596 }else if (secondTaxon == null){
597 logger.warn(line + "second taxon is null for adjust synonym type");
598 }else if (secondTaxon.isInstanceOf(Synonym.class)){
599 Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class);
600 if (firstTaxon.equals(syn.getAcceptedTaxon())){
601 syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF());
602 }
603 }
604 }
605
606 protected TaxonBase<?> getTaxon(Map<String, String> record) {
607 String kew_name_id = getValue(record, Kew_Name_ID);
608 UUID taxonUuid = taxonMap.get(kew_name_id);
609 TaxonBase<?> taxon = getTaxonService().find(taxonUuid);
610 return taxon;
611 }
612
613 private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) {
614 boolean result = true;
615 if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) {
616 logger.warn(line + "Name could not be parsed: " + fullName);
617 result = false;
618 }
619 Reference nomRef = name.getNomenclaturalReference();
620 if (nomRef != null && (nomRef.isProtectedTitleCache()
621 || nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){
622 logger.warn(line + "Nom ref could not be parsed: " + fullName);
623 result = false;
624 }
625 if (nameStr != null && !name.getTitleCache().equals(nameStr)){
626 logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr);
627 result = false;
628 }
629 return result;
630 }
631
632 private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record) {
633 if (this.secReference == null){
634 logger.warn("Load sec ref");
635 String secUuid = record.get(Sec_Ref_CDM_UUID);
636 secReference = getReferenceService().load(UUID.fromString(secUuid));
637 if (this.secReference == null){
638 logger.warn("Sec ref is null");
639 }
640 }
641 return this.secReference;
642 }
643
644 private Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
645 if (this.sourceReference == null){
646 this.sourceReference = getPersistentReference(state.getConfig().getSourceReference());
647 }
648 return this.sourceReference;
649 }
650
651 private Reference getPersistentReference(Reference reference) {
652 Reference result = getReferenceService().find(reference.getUuid());
653 logger.warn("Loaded persistent reference: "+ reference.getUuid());
654 if (result == null){
655 logger.warn("Persistent reference is null: " + reference.getUuid());
656 result = reference;
657 getReferenceService().saveOrUpdate(result);
658 }
659 return result;
660 }
661
662 private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState<CONFIG> state, INonViralName name) {
663 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
664 }
665
666
667 @Override
668 protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
669 String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID);
670 return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state));
671 }
672 }