From a881d4d7c3f5f4340420416bd79c781723606014 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andreas=20M=C3=BCller?= Date: Sat, 15 Jan 2022 00:56:15 +0100 Subject: [PATCH] ref #9918 first version of caryophyllaceae import --- .../CaryophyllaceaeActivator.java | 91 +++ .../cdm/app/common/CdmDestinations.java | 2 +- .../cdm/io/caryo/KewExcelTaxonImport.java | 672 ++++++++++++++++++ .../KewExcelTaxonImportConfigurator.java | 64 ++ 4 files changed, 828 insertions(+), 1 deletion(-) create mode 100644 app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java create mode 100644 app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java create mode 100644 app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java b/app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java new file mode 100644 index 00000000..30384761 --- /dev/null +++ b/app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java @@ -0,0 +1,91 @@ +/** +* Copyright (C) 2007 EDIT +* European Distributed Institute of Taxonomy +* http://www.e-taxonomy.eu +* +* The contents of this file are subject to the Mozilla Public License Version 1.1 +* See LICENSE.TXT at the top of this package for the full license terms. +*/ +package eu.etaxonomy.cdm.app.caryophyllales; + +import java.util.UUID; + +import org.apache.log4j.Logger; + +import eu.etaxonomy.cdm.app.berlinModelImport.SourceBase; +import eu.etaxonomy.cdm.app.common.CdmDestinations; +import eu.etaxonomy.cdm.common.URI; +import eu.etaxonomy.cdm.database.DbSchemaValidation; +import eu.etaxonomy.cdm.database.ICdmDataSource; +import eu.etaxonomy.cdm.io.caryo.KewExcelTaxonImportConfigurator; +import eu.etaxonomy.cdm.io.common.CdmDefaultImport; +import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK; +import eu.etaxonomy.cdm.model.reference.Reference; +import eu.etaxonomy.cdm.model.reference.ReferenceFactory; + +/** + * @author a.mueller + * @since 05.01.2022 + */ +public class CaryophyllaceaeActivator extends SourceBase{ + + @SuppressWarnings("unused") + private static final Logger logger = Logger.getLogger(CaryophyllaceaeActivator.class); + + //database validation status (create, update, validate ...) + static final DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE; + static final URI source = caryophyllaceae(); + + +// static final ICdmDataSource cdmDestination = CdmDestinations.localH2(); + static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_caryo_spp(); + + //classification + static final UUID classificationUuid = UUID.fromString("9edc58b5-de3b-43aa-9f31-1ede7c009c2b"); + + //check - import + static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK; + + //taxa + static final boolean doTaxa = true; + + private void doImport(ICdmDataSource cdmDestination){ + + //make Source + KewExcelTaxonImportConfigurator config= KewExcelTaxonImportConfigurator.NewInstance(source, cdmDestination); + config.setClassificationUuid(classificationUuid); + config.setCheck(check); +// config.setDoTaxa(doTaxa); + config.setDbSchemaValidation(hbm2dll); + config.setSourceReferenceTitle("WCVP2CDM-Caryophyllaceae.xlsx"); + + CdmDefaultImport myImport = new CdmDefaultImport<>(); + + //... + if (true){ + System.out.println("Start import from ("+ source.toString() + ") ..."); + config.setSourceReference(getSourceReference(config.getSourceReferenceTitle())); + myImport.invoke(config); + System.out.println("End import from ("+ source.toString() + ")..."); + } + } + + private Reference getSourceReference(String string) { + Reference result = ReferenceFactory.newGeneric(); + result.setTitleCache(string, true); + return result; + } + + + public static URI caryophyllaceae(){ + String fileName = "WCVP2CDM-Caryophyllaceae.xlsx"; + URI uri = URI.create("file:////BGBM-PESIHPC/Caryophyllales/" + fileName); + return uri; + } + + public static void main(String[] args) { + CaryophyllaceaeActivator me = new CaryophyllaceaeActivator(); + me.doImport(cdmDestination); + System.exit(0); + } +} \ No newline at end of file diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java b/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java index 82d6de0f..607d5537 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java @@ -590,7 +590,7 @@ public class CdmDestinations { public static ICdmDataSource cdm_local_caryo_spp(){ DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL; String cdmServer = "127.0.0.1"; - String cdmDB = "cdm_caryo_spp"; + String cdmDB = "cdm_local_caryophyllales_spp"; String cdmUserName = "edit"; return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null); } diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java new file mode 100644 index 00000000..1638ed67 --- /dev/null +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java @@ -0,0 +1,672 @@ +/** +* Copyright (C) 2016 EDIT +* European Distributed Institute of Taxonomy +* http://www.e-taxonomy.eu +* +* The contents of this file are subject to the Mozilla Public License Version 1.1 +* See LICENSE.TXT at the top of this package for the full license terms. +*/ +package eu.etaxonomy.cdm.io.caryo; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.lang3.StringUtils; +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; +import org.springframework.transaction.TransactionStatus; + +import eu.etaxonomy.cdm.common.CdmUtils; +import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport; +import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState; +import eu.etaxonomy.cdm.model.agent.Person; +import eu.etaxonomy.cdm.model.agent.Team; +import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; +import eu.etaxonomy.cdm.model.common.CdmBase; +import eu.etaxonomy.cdm.model.common.IdentifiableSource; +import eu.etaxonomy.cdm.model.name.INonViralName; +import eu.etaxonomy.cdm.model.name.NomenclaturalCode; +import eu.etaxonomy.cdm.model.name.NomenclaturalStatus; +import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType; +import eu.etaxonomy.cdm.model.name.Rank; +import eu.etaxonomy.cdm.model.name.TaxonName; +import eu.etaxonomy.cdm.model.name.TaxonNameFactory; +import eu.etaxonomy.cdm.model.reference.Reference; +import eu.etaxonomy.cdm.model.reference.ReferenceFactory; +import eu.etaxonomy.cdm.model.reference.ReferenceType; +import eu.etaxonomy.cdm.model.taxon.Classification; +import eu.etaxonomy.cdm.model.taxon.Synonym; +import eu.etaxonomy.cdm.model.taxon.SynonymType; +import eu.etaxonomy.cdm.model.taxon.Taxon; +import eu.etaxonomy.cdm.model.taxon.TaxonBase; +import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; + +/** + * Kew excel taxon import for Caryophyllaceae. + * + * @author a.mueller + * @since 05.01.2022 + */ +@Component +public class KewExcelTaxonImport + extends SimpleExcelTaxonImport{ + + private static final long serialVersionUID = 1081966876789613803L; + private static final Logger logger = Logger.getLogger(KewExcelTaxonImport.class); + + private static final String NO_SIMPLE_DIFF = "xxxxx"; + + private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0"; + private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c"; + private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060"; + + private static final String CDM_Name_UUID = "CDM-Name_UUID"; + private static final String Kew_Name_ID = "Kew-Name-ID"; + private static final String Kew_Name_Citation = "Kew-Name-Citation"; + private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status"; + private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status"; + private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID"; + private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID"; + private static final String GENUS_HYBRID = "genus_hybrid"; + private static final String GENUS = "genus"; + private static final String SPECIES_HYBRID = "species_hybrid"; + private static final String SPECIES = "species"; + + private static final String infraspecific_rank = "infraspecific_rank"; + private static final String infraspecies = "infraspecies"; + + private static final String parenthetical_author = "parenthetical_author"; + private static final String primary_author = "primary_author"; + private static final String publication_author = "publication_author"; + private static final String place_of_publication = "place_of_publication"; + private static final String volume_and_page = "volume_and_page"; + private static final String KewYear4CDM = "KewYear4CDM"; + private static final String PubTypeABSG = "PubTypeABSG"; + private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID"; + + private static final Map nameMap = new HashMap<>(); + private static final Map taxonMap = new HashMap<>(); + + private static List expectedKeys= Arrays.asList(new String[]{ + CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status, + Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS, + SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies, + parenthetical_author, primary_author, publication_author, place_of_publication, + volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID + }); + + private Reference sourceReference; + private Reference secReference; + + private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance(); + +// @Override +// protected String getWorksheetName(CONFIG config) { +// return "valid taxa names"; +// } + + @Override + protected void firstPass(SimpleExcelTaxonImportState state) { + + String line = getLine(state, 50); + System.out.println(line); + Map record = state.getOriginalRecord(); + + Set keys = record.keySet(); + for (String key: keys) { + if (! expectedKeys.contains(key)){ + logger.warn(line + "Unexpected Key: " + key); + } + } + + makeTaxon(state, line, record); + } + + private void makeTaxon(SimpleExcelTaxonImportState state, String line, Map record) { +// state.getTransactionStatus().flush(); + Reference sec = getSecReference(state, record); + + //name + TaxonName existingName = getExistingName(state, line); + if (existingName != null){ + verifyName(state, existingName, record, line, false); + }else{ + existingName = createName(state, line); + } + + //taxon + TaxonBase taxonBase = makeTaxonBase(state, line, record, existingName, sec); + + if (taxonBase != null){ + getTaxonService().saveOrUpdate(taxonBase); + } + + return; + } + + private TaxonName createName(SimpleExcelTaxonImportState state, String line) { + //parse + String fullTitle = getValue(state, Kew_Name_Citation); + String kewNameId = getValue(state, Kew_Name_ID); + + fullTitle = replaceBookSectionAuthor(state, fullTitle); + + TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES()); + handleBookSectionAuthor(newName, state, line); + + putName(kewNameId, newName.getUuid(), line); + //name status + makeNameStatus(line, state.getOriginalRecord(), newName); + verifyName(state, newName, state.getOriginalRecord(), line, true); + //deduplication + replaceNameAuthorsAndReferences(state, newName); + newName.addSource(makeOriginalSource(state)); + getNameService().saveOrUpdate(newName); + //Kew-Nomencl-Status + return newName; + } + + private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState state, String line) { + String type = getValue(state, PubTypeABSG); + if ("BS".equals(type)){ + Reference book = newName.getNomenclaturalReference(); + String pubAuthor = getValue(state, publication_author); + if (book != null && StringUtils.isNotEmpty(pubAuthor)){ + TeamOrPersonBase bookAuthor = parseBookSectionAuthor(pubAuthor, line); + Reference bookSection = ReferenceFactory.newBookSection(); + bookSection.setAuthorship(book.getAuthorship()); + book.setAuthorship(bookAuthor); + bookSection.setInReference(book); + bookSection.setDatePublished(book.getDatePublished()); + newName.setNomenclaturalReference(bookSection); + }else{ + logger.warn(line + "unexpected booksection author handling"); + } + } + } + + private TeamOrPersonBase parseBookSectionAuthor(String pubAuthor, String line) { + TeamOrPersonBase result; + String ed = ""; + if (pubAuthor.endsWith(" (ed.)")){ + ed = " (ed.)"; + }else if (pubAuthor.endsWith(" (eds.)")){ + ed = " (eds.)"; + } + pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length()); + String[] splits = pubAuthor.split("(, | & )"); + if (splits.length > 1){ + Team team = Team.NewInstance(); + result = team; + for (String split : splits){ + if ("al.".equals(split.trim())){ + team.setHasMoreMembers(true); + }else{ + team.addTeamMember(getPerson(split, line)); + } + } + }else{ + result = getPerson(splits[0], line); + } + if (ed.length() > 0){ + result.setTitleCache(result.getTitleCache() + ed, true); + } + return result; + } + + private Person getPerson(String personStr, String line) { + Person result = Person.NewInstance(); + String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)"; +// regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier"; + Matcher matcher = Pattern.compile(regEx).matcher(personStr); + if (matcher.matches()){ + String famName = matcher.group("famname"); + result.setFamilyName(famName); + String initials = personStr.replace(famName,"").trim(); + result.setInitials(initials); + }else{ + result.setTitleCache(personStr, true); + logger.warn(line + "BookSection author could not be parsed: " + personStr); + } + return result; + } + + private String replaceBookSectionAuthor(SimpleExcelTaxonImportState state, String fullTitle) { + String type = getValue(state, PubTypeABSG); + if ("BS".equals(type)){ + String pubAuthor = getValue(state, publication_author); + int inIndex = fullTitle.indexOf(" in "); + int commaIndex = fullTitle.indexOf(", "); + + } + return fullTitle; + } + + private void verifyName(SimpleExcelTaxonImportState state, TaxonName taxonName, + Map record, String line, boolean isNew) { + if (isNew){ + boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line); + if (!parsed){ + return; + } + } + String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew); + verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew); + verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew); + verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew); + String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship()); + verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew); + String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship()); + verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew); + + //reference + Reference nomRef = taxonName.getNomenclaturalReference(); + if (nomRef == null){ + logger.warn(line + "no nom.ref. exists in existing name"); + }else{ + + //place of publication + boolean hasInRef = nomRef.getInReference() != null; + String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ? + nomRef.getInReference().getAbbrevTitle() : + nomRef.getAbbrevTitle(); + String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew); + //author + String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache(); + verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew); + //vol and page + String existingVolume = getVolume(nomRef); + String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference()); + verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew); + //year + verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew); + //pub type + verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew); + } + } + + private String getVolume(Reference nomRef) { + Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef; + String vol = ref.getVolume(); + String edition = ref.getEdition(); + if (StringUtils.isNotBlank(edition)){ + edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ","; + } + String series = ref.getSeriesPart(); + if (StringUtils.isNotBlank(series)){ + series = ", " + (isNumber(series)? "ser. ":"") + series + ","; + } + + return vol; + } + + private boolean isNumber(String edition) { + try { + Integer.valueOf(edition); + } catch (NumberFormatException e) { + return false; + } + return true; + } + + private String authorAndExAuthor(TeamOrPersonBase author, + TeamOrPersonBase exAuthor) { + return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "") + + author.getNomenclaturalTitleCache(); + } + + private String replaceStatus(String fullTitleCache) { + return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", ""); + } + + private String abbrefRefType(ReferenceType type) { + return type == ReferenceType.Article ? "A" : + type == ReferenceType.Book ? "B" : + type == ReferenceType.BookSection ? "BS" : + type == ReferenceType.Generic ? "GEN" : + type.getLabel() ; + } + + private String verifyField(String expectedValue, Map record, String fieldName, String line, String noLogIf, boolean isNew) { + return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew); + } + + private String verifyField(String expectedValue, Map record, String fieldName, String line, + String noLogIf, String noLogIf2, boolean isNew) { + String value = getValue(record, fieldName); + if (!CdmUtils.nullSafeEqual(expectedValue, value)){ + String diff = singleDiff(expectedValue, value); + String label = isNew ? "New " : "Existing"; + if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){ + System.out.println(" " + line + fieldName + "\n "+label+": " + expectedValue + "\n Kew : " + value); + } + return diff; + }else{ + return ""; + } + } + + private String singleDiff(String expectedValue, String value) { + if (expectedValue == null){ + return CdmUtils.Nz(value); + }else if (value == null){ + return CdmUtils.Nz(expectedValue); + } + expectedValue = expectedValue.trim(); + value = value.trim(); + String diff_ab = StringUtils.difference(expectedValue, value); + String diff_ba = StringUtils.difference(value, expectedValue); + if (diff_ab.endsWith(diff_ba)){ + return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length()); + }else if (diff_ba.endsWith(diff_ab)){ + return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length()); + }else{ + return NO_SIMPLE_DIFF; + } + } + + private TaxonName getExistingName(SimpleExcelTaxonImportState state, String line) { + String cdmNameUuid = getValue(state, CDM_Name_UUID); + String kewNameId = getValue(state, Kew_Name_ID); + if (cdmNameUuid == null){ + return null; + } + TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid)); + if (existingName != null){ + putName(kewNameId, existingName.getUuid(), line); + return CdmBase.deproxy(existingName); + }else{ + return null; + } + } + + private void putName(String kewNameId, UUID uuid, String line) { + UUID existingUuid = nameMap.put(kewNameId, uuid); + if (existingUuid != null){ + logger.warn(line + "Kew-Name-id already exists: " + kewNameId); + } + } + + + private void makeNameStatus(String line, Map record, + TaxonName taxonName) { + String nameStatus = getValue(record, Kew_Nomencl_Status); + NomenclaturalStatusType status; + if (isBlank(nameStatus)){ + status = null; + }else if ("Illegitimate".equals(nameStatus)){ + status = NomenclaturalStatusType.ILLEGITIMATE(); + }else if ("Invalid".equals(nameStatus)){ + status = NomenclaturalStatusType.INVALID(); + }else{ + logger.warn(line + "Nom. status not recognized: " + nameStatus); + status = null; + } + if (status != null){ + taxonName.addStatus(NomenclaturalStatus.NewInstance(status)); + } + } + + + private TaxonBase makeTaxonBase(SimpleExcelTaxonImportState state, String line, + Map record, TaxonName taxonName, Reference sec) { + + TaxonBase taxonBase; + boolean isUnplaced = false; + String taxStatusStr = getValue(record, Kew_Taxonomic_Status); + + if ("Accepted".equals(taxStatusStr)){ + taxonBase = Taxon.NewInstance(taxonName, sec); + }else if ("Synonym".equals(taxStatusStr)){ + taxonBase = Synonym.NewInstance(taxonName, sec); + }else if ("Artificial Hybrid".equals(taxStatusStr)){ + taxonBase = Synonym.NewInstance(taxonName, sec); + }else if ("Unplaced".equals(taxStatusStr)){ + taxonBase = Taxon.NewInstance(taxonName, sec); + }else{ + logger.warn(line + "Status not handled: " + taxStatusStr); + return null; + } + taxonBase.addSource(makeOriginalSource(state)); + taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid()); + if (taxonBase instanceof Taxon){ + UUID existing = taxonMap.get(taxonBase.getName().getNameCache()); + if (existing == null || !isUnplaced){ + taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid()); + }else if (!isUnplaced){ + taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid()); + System.out.println(" " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache()); + } + } + return taxonBase; + } + + int c2 = 0; + @Override + protected void secondPass(SimpleExcelTaxonImportState state) { + + String kewId = getValue(state, Kew_Name_ID) + ": "; + String line = " (line: " + state.getCurrentLine() + ")"; +// System.out.println(line); + if (c2++ % 100 == 0){ + this.commitTransaction(state.getTransactionStatus()); + this.classification = null; + this.secReference = null; + this.sourceReference = null; + TransactionStatus tx = this.startTransaction(); + state.setTransactionStatus(tx); + logger.info(line + "New transaction started."); + } + Map record = state.getOriginalRecord(); + + Classification classification = getClassification(state); + TaxonBase taxonBase = getTaxon(record); + TaxonName taxonName = taxonBase.getName(); + + if (taxonBase.isInstanceOf(Taxon.class)){ + Taxon parent = getParent(record, taxonName, line, kewId); + if (parent != null){ + classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null); + } + }else if (taxonBase.isInstanceOf(Synonym.class)){ + Taxon taxon = getAcceptedTaxon(record, line, kewId); + if (taxon == null){ + logger.warn(kewId + "Accepted taxon not found: " + getValue(record, Kew_Rel_Acc_Name_ID) + line); + taxon = getOrphanedSynonymTaxon(state); + }else{ + taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF()); + } + }else{ + logger.warn("Unhandled"); + } + + String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID); + if (basionymId != null){ + UUID basionymUuid = nameMap.get(basionymId); + TaxonName basionym = getNameService().find(basionymUuid); + if(basionym == null){ + logger.warn(kewId + "Basionym does not exist: " + basionymId + line); + }else{ + taxonName.addBasionym(basionym); + taxonName.mergeHomotypicGroups(basionym); //just in case this is not automatically done + //TODO + // adjustSynonymType(taxonBase, basionymTaxon, line); + } + } + + } + + private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState state) { + UUID uuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON); + Taxon placeholderTaxon = CdmBase.deproxy(getTaxonService().find(uuid), Taxon.class); + if (placeholderTaxon == null){ + TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY()); + placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true); + placeholderTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord())); + Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class); + getClassification(state).addParentChild(unplacedTaxon, placeholderTaxon, null, null); + } + return placeholderTaxon; + } + + private Classification classification; + private Classification getClassification(SimpleExcelTaxonImportState state) { + if (classification == null){ + classification = getClassificationService().find(state.getConfig().getClassificationUuid()); + } + return classification; + } + + private Taxon getAcceptedTaxon(Map record, String line, String kewId) { + String statusStr = getValue(record, Kew_Taxonomic_Status); + if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){ + String accKewId = getValue(record, Kew_Rel_Acc_Name_ID); + UUID accUuid = taxonMap.get(accKewId); + TaxonBase accBase = getTaxonService().find(accUuid); + if (accBase == null){ + logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line); + return null; + }else if (accBase.isInstanceOf(Synonym.class)){ + logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line); + return null; + }else{ + return CdmBase.deproxy(accBase, Taxon.class); + } + }else{ + logger.warn(kewId + "Parent not retrieved" + line); + return null; + } + } + + private Taxon getParent(Map record, TaxonName taxonName, String line, String kewId) { + String statusStr = getValue(record, Kew_Taxonomic_Status); + if ("Unplaced".equals(statusStr)){ + return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class); + }else if ("Artificial Hybrid".equals(statusStr)){ + return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now + }else if ("Accepted".equals(statusStr)){ + String higherName = getHigherRankName(taxonName); + UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName); + if (parentTaxonUuid != null){ + TaxonBase parentBase = getTaxonService().find(parentTaxonUuid); + if (parentBase == null){ + return null; + } else if (parentBase.isInstanceOf(Taxon.class)){ + Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class); + return parentTaxon; + } else { + logger.warn(kewId + "Parent is synonym " + line); + return null; + } + }else{ + return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class); + } + }else if ("Synonym".equals(statusStr)){ + //not relevant + return null; + }else{ + logger.warn(kewId + "Parent not retrieved" + line); + return null; + } + } + + private String getHigherRankName(TaxonName taxonName) { + if (Rank.SPECIES().equals(taxonName.getRank())){ + return taxonName.getGenusOrUninomial(); + }else if (taxonName.isInfraSpecific()){ + return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet(); + } + return null; + } + + private void adjustSynonymType(TaxonBase taxonBase, TaxonBase homotypicTaxon, String line) { + adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line); + adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line); + } + + private void adjustSynonymTypeOrdered(TaxonBase firstTaxon, TaxonBase secondTaxon, String line) { + if (firstTaxon == null){ + logger.warn(line + "first taxon is null for adjust synonym type"); + }else if (secondTaxon == null){ + logger.warn(line + "second taxon is null for adjust synonym type"); + }else if (secondTaxon.isInstanceOf(Synonym.class)){ + Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class); + if (firstTaxon.equals(syn.getAcceptedTaxon())){ + syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF()); + } + } + } + + protected TaxonBase getTaxon(Map record) { + String kew_name_id = getValue(record, Kew_Name_ID); + UUID taxonUuid = taxonMap.get(kew_name_id); + TaxonBase taxon = getTaxonService().find(taxonUuid); + return taxon; + } + + private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) { + boolean result = true; + if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) { + logger.warn(line + "Name could not be parsed: " + fullName); + result = false; + } + Reference nomRef = name.getNomenclaturalReference(); + if (nomRef != null && (nomRef.isProtectedTitleCache() + || nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){ + logger.warn(line + "Nom ref could not be parsed: " + fullName); + result = false; + } + if (nameStr != null && !name.getTitleCache().equals(nameStr)){ + logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr); + result = false; + } + return result; + } + + private Reference getSecReference(SimpleExcelTaxonImportState state, Map record) { + if (this.secReference == null){ + logger.warn("Load sec ref"); + String secUuid = record.get(Sec_Ref_CDM_UUID); + secReference = getReferenceService().load(UUID.fromString(secUuid)); + if (this.secReference == null){ + logger.warn("Sec ref is null"); + } + } + return this.secReference; + } + + private Reference getSourceCitation(SimpleExcelTaxonImportState state) { + if (this.sourceReference == null){ + this.sourceReference = getPersistentReference(state.getConfig().getSourceReference()); + } + return this.sourceReference; + } + + private Reference getPersistentReference(Reference reference) { + Reference result = getReferenceService().find(reference.getUuid()); + logger.warn("Loaded persistent reference: "+ reference.getUuid()); + if (result == null){ + logger.warn("Persistent reference is null: " + reference.getUuid()); + result = reference; + getReferenceService().saveOrUpdate(result); + } + return result; + } + + private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState state, INonViralName name) { + state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name); + } + + + @Override + protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState state) { + String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID); + return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state)); + } +} diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java new file mode 100644 index 00000000..81d95a03 --- /dev/null +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java @@ -0,0 +1,64 @@ +/** +* Copyright (C) 2016 EDIT +* European Distributed Institute of Taxonomy +* http://www.e-taxonomy.eu +* +* The contents of this file are subject to the Mozilla Public License Version 1.1 +* See LICENSE.TXT at the top of this package for the full license terms. +*/ +package eu.etaxonomy.cdm.io.caryo; + +import eu.etaxonomy.cdm.common.URI; +import eu.etaxonomy.cdm.database.ICdmDataSource; +import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer; +import eu.etaxonomy.cdm.io.excel.common.ExcelImportConfiguratorBase; +import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState; +import eu.etaxonomy.cdm.model.name.NomenclaturalCode; +import eu.etaxonomy.cdm.model.reference.Reference; + +/** + * Configurator for Kew excel taxon import for Caryophyllaceae. + * + * @author a.mueller + * @since 05.01.2022 + */ +public class KewExcelTaxonImportConfigurator + extends ExcelImportConfiguratorBase{ + + private static final long serialVersionUID = -1819917445326422841L; + + private static IInputTransformer defaultTransformer = null; + private Reference secReference; + + public static KewExcelTaxonImportConfigurator NewInstance(URI source, ICdmDataSource destination) { + return new KewExcelTaxonImportConfigurator(source, destination); + } + + private KewExcelTaxonImportConfigurator(URI source, ICdmDataSource destination) { + super(source, destination, defaultTransformer); + setNomenclaturalCode(NomenclaturalCode.ICNAFP); + setSource(source); + setDestination(destination); + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + @Override + public SimpleExcelTaxonImportState getNewState() { + return new SimpleExcelTaxonImportState<>(this); + } + + @SuppressWarnings("unchecked") + @Override + protected void makeIoClassList() { + ioClassList = new Class[]{ + KewExcelTaxonImport.class, + }; + } + + public Reference getSecReference() { + return secReference; + } + public void setSecReference(Reference secReference) { + this.secReference = secReference; + } +} -- 2.34.1