ref #9918 first version of caryophyllaceae import
authorAndreas Müller <a.mueller@bgbm.org>
Fri, 14 Jan 2022 23:56:15 +0000 (00:56 +0100)
committerAndreas Müller <a.mueller@bgbm.org>
Fri, 14 Jan 2022 23:56:15 +0000 (00:56 +0100)
app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java [new file with mode: 0644]
app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java
app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java [new file with mode: 0644]
app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java [new file with mode: 0644]

diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java b/app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java
new file mode 100644 (file)
index 0000000..3038476
--- /dev/null
@@ -0,0 +1,91 @@
+/**
+* Copyright (C) 2007 EDIT
+* European Distributed Institute of Taxonomy
+* http://www.e-taxonomy.eu
+*
+* The contents of this file are subject to the Mozilla Public License Version 1.1
+* See LICENSE.TXT at the top of this package for the full license terms.
+*/
+package eu.etaxonomy.cdm.app.caryophyllales;
+
+import java.util.UUID;
+
+import org.apache.log4j.Logger;
+
+import eu.etaxonomy.cdm.app.berlinModelImport.SourceBase;
+import eu.etaxonomy.cdm.app.common.CdmDestinations;
+import eu.etaxonomy.cdm.common.URI;
+import eu.etaxonomy.cdm.database.DbSchemaValidation;
+import eu.etaxonomy.cdm.database.ICdmDataSource;
+import eu.etaxonomy.cdm.io.caryo.KewExcelTaxonImportConfigurator;
+import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
+import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
+import eu.etaxonomy.cdm.model.reference.Reference;
+import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
+
+/**
+ * @author a.mueller
+ * @since 05.01.2022
+ */
+public class CaryophyllaceaeActivator extends SourceBase{
+
+       @SuppressWarnings("unused")
+    private static final Logger logger = Logger.getLogger(CaryophyllaceaeActivator.class);
+
+       //database validation status (create, update, validate ...)
+       static final DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE;
+       static final URI source = caryophyllaceae();
+
+
+//     static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
+       static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_caryo_spp();
+
+       //classification
+       static final UUID classificationUuid = UUID.fromString("9edc58b5-de3b-43aa-9f31-1ede7c009c2b");
+
+       //check - import
+       static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
+
+       //taxa
+       static final boolean doTaxa = true;
+
+       private void doImport(ICdmDataSource cdmDestination){
+
+               //make Source
+           KewExcelTaxonImportConfigurator config= KewExcelTaxonImportConfigurator.NewInstance(source, cdmDestination);
+               config.setClassificationUuid(classificationUuid);
+               config.setCheck(check);
+//             config.setDoTaxa(doTaxa);
+               config.setDbSchemaValidation(hbm2dll);
+               config.setSourceReferenceTitle("WCVP2CDM-Caryophyllaceae.xlsx");
+
+               CdmDefaultImport<KewExcelTaxonImportConfigurator> myImport = new CdmDefaultImport<>();
+
+               //...
+               if (true){
+                       System.out.println("Start import from ("+ source.toString() + ") ...");
+                       config.setSourceReference(getSourceReference(config.getSourceReferenceTitle()));
+                       myImport.invoke(config);
+                       System.out.println("End import from ("+ source.toString() + ")...");
+               }
+       }
+
+       private Reference getSourceReference(String string) {
+               Reference result = ReferenceFactory.newGeneric();
+               result.setTitleCache(string, true);
+               return result;
+       }
+
+
+       public static URI caryophyllaceae(){
+      String fileName = "WCVP2CDM-Caryophyllaceae.xlsx";
+      URI uri = URI.create("file:////BGBM-PESIHPC/Caryophyllales/" +  fileName);
+      return uri;
+       }
+
+       public static void main(String[] args) {
+               CaryophyllaceaeActivator me = new CaryophyllaceaeActivator();
+               me.doImport(cdmDestination);
+               System.exit(0);
+       }
+}
\ No newline at end of file
index 82d6de0f450b92b9ff2a23317df964b07190f4a6..607d55378fad69da168086c43ff7e528a92d7d05 100644 (file)
@@ -590,7 +590,7 @@ public class CdmDestinations {
     public static ICdmDataSource cdm_local_caryo_spp(){
         DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
         String cdmServer = "127.0.0.1";
-        String cdmDB = "cdm_caryo_spp";
+        String cdmDB = "cdm_local_caryophyllales_spp";
         String cdmUserName = "edit";
         return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
     }
diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java
new file mode 100644 (file)
index 0000000..1638ed6
--- /dev/null
@@ -0,0 +1,672 @@
+/**
+* Copyright (C) 2016 EDIT
+* European Distributed Institute of Taxonomy
+* http://www.e-taxonomy.eu
+*
+* The contents of this file are subject to the Mozilla Public License Version 1.1
+* See LICENSE.TXT at the top of this package for the full license terms.
+*/
+package eu.etaxonomy.cdm.io.caryo;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+import org.springframework.stereotype.Component;
+import org.springframework.transaction.TransactionStatus;
+
+import eu.etaxonomy.cdm.common.CdmUtils;
+import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
+import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
+import eu.etaxonomy.cdm.model.agent.Person;
+import eu.etaxonomy.cdm.model.agent.Team;
+import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
+import eu.etaxonomy.cdm.model.common.CdmBase;
+import eu.etaxonomy.cdm.model.common.IdentifiableSource;
+import eu.etaxonomy.cdm.model.name.INonViralName;
+import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
+import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
+import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
+import eu.etaxonomy.cdm.model.name.Rank;
+import eu.etaxonomy.cdm.model.name.TaxonName;
+import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
+import eu.etaxonomy.cdm.model.reference.Reference;
+import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
+import eu.etaxonomy.cdm.model.reference.ReferenceType;
+import eu.etaxonomy.cdm.model.taxon.Classification;
+import eu.etaxonomy.cdm.model.taxon.Synonym;
+import eu.etaxonomy.cdm.model.taxon.SynonymType;
+import eu.etaxonomy.cdm.model.taxon.Taxon;
+import eu.etaxonomy.cdm.model.taxon.TaxonBase;
+import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
+
+/**
+ * Kew excel taxon import for Caryophyllaceae.
+ *
+ * @author a.mueller
+ * @since 05.01.2022
+ */
+@Component
+public class KewExcelTaxonImport<CONFIG extends KewExcelTaxonImportConfigurator>
+            extends SimpleExcelTaxonImport<CONFIG>{
+
+    private static final long serialVersionUID = 1081966876789613803L;
+    private static final Logger logger = Logger.getLogger(KewExcelTaxonImport.class);
+
+    private static final String NO_SIMPLE_DIFF = "xxxxx";
+
+    private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0";
+    private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c";
+    private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060";
+
+    private static final String CDM_Name_UUID = "CDM-Name_UUID";
+    private static final String Kew_Name_ID = "Kew-Name-ID";
+    private static final String Kew_Name_Citation = "Kew-Name-Citation";
+    private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status";
+    private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status";
+    private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID";
+    private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID";
+    private static final String GENUS_HYBRID = "genus_hybrid";
+    private static final String GENUS = "genus";
+    private static final String SPECIES_HYBRID = "species_hybrid";
+    private static final String SPECIES = "species";
+
+    private static final String infraspecific_rank = "infraspecific_rank";
+    private static final String infraspecies = "infraspecies";
+
+    private static final String parenthetical_author = "parenthetical_author";
+    private static final String primary_author = "primary_author";
+    private static final String publication_author = "publication_author";
+    private static final String place_of_publication = "place_of_publication";
+    private static final String volume_and_page = "volume_and_page";
+    private static final String KewYear4CDM = "KewYear4CDM";
+    private static final String PubTypeABSG = "PubTypeABSG";
+    private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID";
+
+    private static final Map<String, UUID> nameMap = new HashMap<>();
+    private static final Map<String, UUID> taxonMap = new HashMap<>();
+
+    private  static List<String> expectedKeys= Arrays.asList(new String[]{
+            CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status,
+            Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS,
+            SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies,
+            parenthetical_author, primary_author, publication_author, place_of_publication,
+            volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID
+    });
+
+    private Reference sourceReference;
+    private Reference secReference;
+
+    private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
+
+//    @Override
+//    protected String getWorksheetName(CONFIG config) {
+//        return "valid taxa names";
+//    }
+
+    @Override
+    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
+
+        String line = getLine(state, 50);
+        System.out.println(line);
+        Map<String, String> record = state.getOriginalRecord();
+
+        Set<String> keys = record.keySet();
+        for (String key: keys) {
+            if (! expectedKeys.contains(key)){
+                logger.warn(line + "Unexpected Key: " + key);
+            }
+        }
+
+        makeTaxon(state, line, record);
+    }
+
+    private void makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record) {
+//        state.getTransactionStatus().flush();
+        Reference sec = getSecReference(state, record);
+
+        //name
+        TaxonName existingName = getExistingName(state, line);
+        if (existingName != null){
+            verifyName(state, existingName, record, line, false);
+        }else{
+            existingName = createName(state, line);
+        }
+
+        //taxon
+        TaxonBase<?> taxonBase = makeTaxonBase(state, line, record, existingName, sec);
+
+        if (taxonBase != null){
+            getTaxonService().saveOrUpdate(taxonBase);
+        }
+
+        return;
+    }
+
+    private TaxonName createName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
+        //parse
+        String fullTitle = getValue(state, Kew_Name_Citation);
+        String kewNameId = getValue(state, Kew_Name_ID);
+
+        fullTitle = replaceBookSectionAuthor(state, fullTitle);
+
+        TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES());
+        handleBookSectionAuthor(newName, state, line);
+
+        putName(kewNameId, newName.getUuid(), line);
+        //name status
+        makeNameStatus(line, state.getOriginalRecord(), newName);
+        verifyName(state, newName, state.getOriginalRecord(), line, true);
+        //deduplication
+        replaceNameAuthorsAndReferences(state, newName);
+        newName.addSource(makeOriginalSource(state));
+        getNameService().saveOrUpdate(newName);
+        //Kew-Nomencl-Status
+        return newName;
+    }
+
+    private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState<CONFIG> state, String line) {
+        String type = getValue(state, PubTypeABSG);
+        if ("BS".equals(type)){
+            Reference book = newName.getNomenclaturalReference();
+            String pubAuthor = getValue(state, publication_author);
+            if (book != null && StringUtils.isNotEmpty(pubAuthor)){
+                TeamOrPersonBase<?> bookAuthor = parseBookSectionAuthor(pubAuthor, line);
+                Reference bookSection = ReferenceFactory.newBookSection();
+                bookSection.setAuthorship(book.getAuthorship());
+                book.setAuthorship(bookAuthor);
+                bookSection.setInReference(book);
+                bookSection.setDatePublished(book.getDatePublished());
+                newName.setNomenclaturalReference(bookSection);
+            }else{
+                logger.warn(line + "unexpected booksection author handling");
+            }
+        }
+    }
+
+    private TeamOrPersonBase<?> parseBookSectionAuthor(String pubAuthor, String line) {
+        TeamOrPersonBase<?> result;
+        String ed = "";
+        if (pubAuthor.endsWith(" (ed.)")){
+            ed = " (ed.)";
+        }else if (pubAuthor.endsWith(" (eds.)")){
+            ed = " (eds.)";
+        }
+        pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length());
+        String[] splits = pubAuthor.split("(, | & )");
+        if (splits.length > 1){
+            Team team = Team.NewInstance();
+            result = team;
+            for (String split : splits){
+                if ("al.".equals(split.trim())){
+                    team.setHasMoreMembers(true);
+                }else{
+                    team.addTeamMember(getPerson(split, line));
+                }
+            }
+        }else{
+            result = getPerson(splits[0], line);
+        }
+        if (ed.length() > 0){
+            result.setTitleCache(result.getTitleCache() + ed, true);
+        }
+        return result;
+    }
+
+    private Person getPerson(String personStr, String line) {
+        Person result = Person.NewInstance();
+        String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?<famname>[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)";
+//        regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier";
+        Matcher matcher = Pattern.compile(regEx).matcher(personStr);
+        if (matcher.matches()){
+            String famName = matcher.group("famname");
+            result.setFamilyName(famName);
+            String initials = personStr.replace(famName,"").trim();
+            result.setInitials(initials);
+        }else{
+            result.setTitleCache(personStr, true);
+            logger.warn(line + "BookSection author could not be parsed: " +  personStr);
+        }
+        return result;
+    }
+
+    private String replaceBookSectionAuthor(SimpleExcelTaxonImportState<CONFIG> state, String fullTitle) {
+        String type = getValue(state, PubTypeABSG);
+        if ("BS".equals(type)){
+            String pubAuthor = getValue(state, publication_author);
+            int inIndex = fullTitle.indexOf(" in ");
+            int commaIndex = fullTitle.indexOf(", ");
+
+        }
+        return fullTitle;
+    }
+
+    private void verifyName(SimpleExcelTaxonImportState<CONFIG> state, TaxonName taxonName,
+            Map<String, String> record, String line, boolean isNew) {
+        if (isNew){
+            boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line);
+            if (!parsed){
+                return;
+            }
+        }
+        String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew);
+        verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew);
+        verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew);
+        verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew);
+        String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship());
+        verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew);
+        String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship());
+        verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew);
+
+        //reference
+        Reference nomRef = taxonName.getNomenclaturalReference();
+        if (nomRef == null){
+            logger.warn(line + "no nom.ref. exists in existing name");
+        }else{
+
+            //place of publication
+            boolean hasInRef = nomRef.getInReference() != null;
+            String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ?
+                    nomRef.getInReference().getAbbrevTitle() :
+                    nomRef.getAbbrevTitle();
+            String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew);
+            //author
+            String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache();
+            verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew);
+            //vol and page
+            String existingVolume = getVolume(nomRef);
+            String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference());
+            verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew);
+            //year
+            verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew);
+            //pub type
+            verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew);
+        }
+    }
+
+    private String getVolume(Reference nomRef) {
+        Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef;
+        String vol = ref.getVolume();
+        String edition = ref.getEdition();
+        if (StringUtils.isNotBlank(edition)){
+            edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ",";
+        }
+        String series = ref.getSeriesPart();
+        if (StringUtils.isNotBlank(series)){
+            series = ", " + (isNumber(series)? "ser. ":"") + series + ",";
+        }
+
+        return vol;
+    }
+
+    private boolean isNumber(String edition) {
+        try {
+            Integer.valueOf(edition);
+        } catch (NumberFormatException e) {
+            return false;
+        }
+        return true;
+    }
+
+    private String authorAndExAuthor(TeamOrPersonBase<?> author,
+            TeamOrPersonBase<?> exAuthor) {
+        return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "")
+                + author.getNomenclaturalTitleCache();
+    }
+
+    private String replaceStatus(String fullTitleCache) {
+        return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", "");
+    }
+
+    private String abbrefRefType(ReferenceType type) {
+        return type == ReferenceType.Article ? "A" :
+            type == ReferenceType.Book ? "B" :
+            type == ReferenceType.BookSection ? "BS" :
+            type == ReferenceType.Generic ? "GEN" :
+            type.getLabel() ;
+    }
+
+    private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line, String noLogIf, boolean isNew) {
+        return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew);
+    }
+
+    private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line,
+            String noLogIf, String noLogIf2, boolean isNew) {
+        String value = getValue(record, fieldName);
+        if (!CdmUtils.nullSafeEqual(expectedValue, value)){
+            String diff = singleDiff(expectedValue, value);
+            String label = isNew ? "New     " : "Existing";
+            if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){
+                System.out.println("   " + line + fieldName + "\n        "+label+": " + expectedValue + "\n        Kew     : " + value);
+            }
+            return diff;
+        }else{
+            return "";
+        }
+    }
+
+    private String singleDiff(String expectedValue, String value) {
+        if (expectedValue == null){
+            return CdmUtils.Nz(value);
+        }else if (value == null){
+            return CdmUtils.Nz(expectedValue);
+        }
+        expectedValue = expectedValue.trim();
+        value = value.trim();
+        String diff_ab = StringUtils.difference(expectedValue, value);
+        String diff_ba = StringUtils.difference(value, expectedValue);
+        if (diff_ab.endsWith(diff_ba)){
+            return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length());
+        }else if (diff_ba.endsWith(diff_ab)){
+            return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length());
+        }else{
+            return NO_SIMPLE_DIFF;
+        }
+    }
+
+    private TaxonName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
+        String cdmNameUuid = getValue(state, CDM_Name_UUID);
+        String kewNameId = getValue(state, Kew_Name_ID);
+        if (cdmNameUuid == null){
+            return null;
+        }
+        TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid));
+        if (existingName != null){
+            putName(kewNameId, existingName.getUuid(), line);
+            return CdmBase.deproxy(existingName);
+        }else{
+            return null;
+        }
+    }
+
+    private void putName(String kewNameId, UUID uuid, String line) {
+        UUID existingUuid = nameMap.put(kewNameId, uuid);
+        if (existingUuid != null){
+            logger.warn(line + "Kew-Name-id already exists: " + kewNameId);
+        }
+    }
+
+
+    private void makeNameStatus(String line, Map<String, String> record,
+            TaxonName taxonName) {
+        String nameStatus = getValue(record, Kew_Nomencl_Status);
+        NomenclaturalStatusType status;
+        if (isBlank(nameStatus)){
+            status = null;
+        }else if ("Illegitimate".equals(nameStatus)){
+            status = NomenclaturalStatusType.ILLEGITIMATE();
+        }else if ("Invalid".equals(nameStatus)){
+            status = NomenclaturalStatusType.INVALID();
+        }else{
+            logger.warn(line + "Nom. status not recognized: " + nameStatus);
+            status = null;
+        }
+        if (status != null){
+            taxonName.addStatus(NomenclaturalStatus.NewInstance(status));
+        }
+    }
+
+
+    private TaxonBase<?> makeTaxonBase(SimpleExcelTaxonImportState<CONFIG> state, String line,
+            Map<String, String> record, TaxonName taxonName, Reference sec) {
+
+        TaxonBase<?> taxonBase;
+        boolean isUnplaced = false;
+        String taxStatusStr = getValue(record, Kew_Taxonomic_Status);
+
+        if ("Accepted".equals(taxStatusStr)){
+            taxonBase = Taxon.NewInstance(taxonName, sec);
+        }else if ("Synonym".equals(taxStatusStr)){
+            taxonBase = Synonym.NewInstance(taxonName, sec);
+        }else if ("Artificial Hybrid".equals(taxStatusStr)){
+            taxonBase = Synonym.NewInstance(taxonName, sec);
+        }else if ("Unplaced".equals(taxStatusStr)){
+            taxonBase = Taxon.NewInstance(taxonName, sec);
+        }else{
+            logger.warn(line + "Status not handled: " + taxStatusStr);
+            return null;
+        }
+        taxonBase.addSource(makeOriginalSource(state));
+        taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid());
+        if (taxonBase instanceof Taxon){
+            UUID existing = taxonMap.get(taxonBase.getName().getNameCache());
+            if (existing == null || !isUnplaced){
+                taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
+            }else if (!isUnplaced){
+                taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
+                System.out.println("  " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache());
+            }
+        }
+        return taxonBase;
+    }
+
+    int c2 = 0;
+    @Override
+    protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
+
+        String kewId = getValue(state, Kew_Name_ID) + ": ";
+        String line = " (line: " + state.getCurrentLine() + ")";
+//        System.out.println(line);
+        if (c2++ % 100 == 0){
+            this.commitTransaction(state.getTransactionStatus());
+            this.classification = null;
+            this.secReference = null;
+            this.sourceReference = null;
+            TransactionStatus tx = this.startTransaction();
+            state.setTransactionStatus(tx);
+            logger.info(line + "New transaction started.");
+        }
+        Map<String, String> record = state.getOriginalRecord();
+
+        Classification classification = getClassification(state);
+        TaxonBase<?> taxonBase = getTaxon(record);
+        TaxonName taxonName = taxonBase.getName();
+
+        if (taxonBase.isInstanceOf(Taxon.class)){
+            Taxon parent = getParent(record, taxonName, line, kewId);
+            if (parent != null){
+                classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null);
+            }
+        }else if (taxonBase.isInstanceOf(Synonym.class)){
+            Taxon taxon = getAcceptedTaxon(record, line, kewId);
+            if (taxon == null){
+                logger.warn(kewId + "Accepted taxon not found: " + getValue(record, Kew_Rel_Acc_Name_ID) + line);
+                taxon = getOrphanedSynonymTaxon(state);
+            }else{
+                taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF());
+            }
+        }else{
+            logger.warn("Unhandled");
+        }
+
+        String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID);
+        if (basionymId != null){
+            UUID basionymUuid = nameMap.get(basionymId);
+            TaxonName basionym = getNameService().find(basionymUuid);
+            if(basionym == null){
+                logger.warn(kewId + "Basionym does not exist: " + basionymId + line);
+            }else{
+                taxonName.addBasionym(basionym);
+                taxonName.mergeHomotypicGroups(basionym);  //just in case this is not automatically done
+                //TODO
+                //          adjustSynonymType(taxonBase, basionymTaxon, line);
+            }
+        }
+
+    }
+
+    private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState<CONFIG> state) {
+        UUID uuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON);
+        Taxon placeholderTaxon = CdmBase.deproxy(getTaxonService().find(uuid), Taxon.class);
+        if (placeholderTaxon == null){
+            TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY());
+            placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true);
+            placeholderTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord()));
+            Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
+            getClassification(state).addParentChild(unplacedTaxon, placeholderTaxon, null, null);
+        }
+        return placeholderTaxon;
+    }
+
+    private Classification classification;
+    private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
+        if (classification == null){
+            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
+        }
+        return classification;
+    }
+
+    private Taxon getAcceptedTaxon(Map<String, String> record, String line, String kewId) {
+        String statusStr = getValue(record, Kew_Taxonomic_Status);
+        if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){
+            String accKewId = getValue(record, Kew_Rel_Acc_Name_ID);
+            UUID accUuid = taxonMap.get(accKewId);
+            TaxonBase<?> accBase = getTaxonService().find(accUuid);
+            if (accBase == null){
+                logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line);
+                return null;
+            }else if (accBase.isInstanceOf(Synonym.class)){
+                logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line);
+                return null;
+            }else{
+                return CdmBase.deproxy(accBase, Taxon.class);
+            }
+        }else{
+            logger.warn(kewId + "Parent not retrieved" +  line);
+            return null;
+        }
+    }
+
+    private Taxon getParent(Map<String, String> record, TaxonName taxonName, String line, String kewId) {
+        String statusStr = getValue(record, Kew_Taxonomic_Status);
+        if ("Unplaced".equals(statusStr)){
+            return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
+        }else if ("Artificial Hybrid".equals(statusStr)){
+            return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now
+        }else if ("Accepted".equals(statusStr)){
+            String higherName = getHigherRankName(taxonName);
+            UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName);
+            if (parentTaxonUuid != null){
+                TaxonBase<?> parentBase = getTaxonService().find(parentTaxonUuid);
+                if (parentBase == null){
+                    return null;
+                } else if (parentBase.isInstanceOf(Taxon.class)){
+                    Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class);
+                    return parentTaxon;
+                } else {
+                    logger.warn(kewId + "Parent is synonym " + line);
+                    return null;
+                }
+            }else{
+                return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class);
+            }
+        }else if ("Synonym".equals(statusStr)){
+            //not relevant
+            return null;
+        }else{
+            logger.warn(kewId + "Parent not retrieved" + line);
+            return null;
+        }
+    }
+
+    private String getHigherRankName(TaxonName taxonName) {
+        if (Rank.SPECIES().equals(taxonName.getRank())){
+            return taxonName.getGenusOrUninomial();
+        }else if (taxonName.isInfraSpecific()){
+            return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet();
+        }
+        return null;
+    }
+
+    private void adjustSynonymType(TaxonBase<?> taxonBase, TaxonBase<?> homotypicTaxon, String line) {
+        adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line);
+        adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line);
+    }
+
+    private void adjustSynonymTypeOrdered(TaxonBase<?> firstTaxon, TaxonBase<?> secondTaxon, String line) {
+        if (firstTaxon == null){
+            logger.warn(line + "first taxon is null for adjust synonym type");
+        }else if (secondTaxon == null){
+            logger.warn(line + "second taxon is null for adjust synonym type");
+        }else if (secondTaxon.isInstanceOf(Synonym.class)){
+            Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class);
+            if (firstTaxon.equals(syn.getAcceptedTaxon())){
+                syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF());
+            }
+        }
+    }
+
+    protected TaxonBase<?> getTaxon(Map<String, String> record) {
+        String kew_name_id = getValue(record, Kew_Name_ID);
+        UUID taxonUuid = taxonMap.get(kew_name_id);
+        TaxonBase<?> taxon = getTaxonService().find(taxonUuid);
+        return taxon;
+    }
+
+       private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) {
+               boolean result = true;
+           if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) {
+                       logger.warn(line + "Name could not be parsed: " + fullName);
+                       result = false;
+               }
+               Reference nomRef = name.getNomenclaturalReference();
+               if (nomRef != null && (nomRef.isProtectedTitleCache()
+                       || nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){
+                   logger.warn(line + "Nom ref could not be parsed: " + fullName);
+            result = false;
+               }
+               if (nameStr != null && !name.getTitleCache().equals(nameStr)){
+            logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr);
+            result = false;
+        }
+               return result;
+       }
+
+    private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record) {
+        if (this.secReference == null){
+            logger.warn("Load sec ref");
+            String secUuid = record.get(Sec_Ref_CDM_UUID);
+            secReference = getReferenceService().load(UUID.fromString(secUuid));
+            if (this.secReference == null){
+                logger.warn("Sec ref is null");
+            }
+        }
+        return this.secReference;
+    }
+
+    private Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
+        if (this.sourceReference == null){
+            this.sourceReference = getPersistentReference(state.getConfig().getSourceReference());
+        }
+        return this.sourceReference;
+    }
+
+    private Reference getPersistentReference(Reference reference) {
+        Reference result = getReferenceService().find(reference.getUuid());
+        logger.warn("Loaded persistent reference: "+ reference.getUuid());
+        if (result == null){
+            logger.warn("Persistent reference is null: " + reference.getUuid());
+            result = reference;
+            getReferenceService().saveOrUpdate(result);
+        }
+        return result;
+    }
+
+    private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState<CONFIG> state, INonViralName name) {
+        state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
+    }
+
+
+    @Override
+    protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
+       String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID);
+        return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state));
+    }
+}
diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java
new file mode 100644 (file)
index 0000000..81d95a0
--- /dev/null
@@ -0,0 +1,64 @@
+/**
+* Copyright (C) 2016 EDIT
+* European Distributed Institute of Taxonomy
+* http://www.e-taxonomy.eu
+*
+* The contents of this file are subject to the Mozilla Public License Version 1.1
+* See LICENSE.TXT at the top of this package for the full license terms.
+*/
+package eu.etaxonomy.cdm.io.caryo;
+
+import eu.etaxonomy.cdm.common.URI;
+import eu.etaxonomy.cdm.database.ICdmDataSource;
+import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer;
+import eu.etaxonomy.cdm.io.excel.common.ExcelImportConfiguratorBase;
+import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
+import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
+import eu.etaxonomy.cdm.model.reference.Reference;
+
+/**
+ * Configurator for Kew excel taxon import for Caryophyllaceae.
+ *
+ * @author a.mueller
+ * @since 05.01.2022
+ */
+public class KewExcelTaxonImportConfigurator
+        extends ExcelImportConfiguratorBase{
+
+    private static final long serialVersionUID = -1819917445326422841L;
+
+    private static IInputTransformer defaultTransformer = null;
+    private Reference secReference;
+
+    public static KewExcelTaxonImportConfigurator NewInstance(URI source, ICdmDataSource destination) {
+        return new KewExcelTaxonImportConfigurator(source, destination);
+    }
+
+    private KewExcelTaxonImportConfigurator(URI source, ICdmDataSource destination) {
+        super(source, destination, defaultTransformer);
+        setNomenclaturalCode(NomenclaturalCode.ICNAFP);
+        setSource(source);
+        setDestination(destination);
+     }
+
+    @SuppressWarnings({ "unchecked", "rawtypes" })
+    @Override
+    public SimpleExcelTaxonImportState getNewState() {
+        return new SimpleExcelTaxonImportState<>(this);
+    }
+
+    @SuppressWarnings("unchecked")
+       @Override
+    protected void makeIoClassList() {
+        ioClassList = new Class[]{
+                KewExcelTaxonImport.class,
+        };
+    }
+
+    public Reference getSecReference() {
+        return secReference;
+    }
+    public void setSecReference(Reference secReference) {
+        this.secReference = secReference;
+    }
+}