ref #9359 remove hsqldb StringComparator from ErmsTaxonImport
[cdmlib-apps.git] / cdm-pesi / src / main / java / eu / etaxonomy / cdm / io / pesi / erms / ErmsTaxonImport.java
index ca6bcde89190c90fd51de59b7840a74baea636d5..eb1a5f2cdd7de3db3a67c8924192d3ede5c82c90 100644 (file)
@@ -11,8 +11,10 @@ package eu.etaxonomy.cdm.io.pesi.erms;
 \r
 import java.sql.ResultSet;\r
 import java.sql.SQLException;\r
+import java.util.ArrayList;\r
 import java.util.HashMap;\r
 import java.util.HashSet;\r
+import java.util.List;\r
 import java.util.Map;\r
 import java.util.Set;\r
 import java.util.UUID;\r
@@ -21,21 +23,33 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.log4j.Logger;\r
 import org.springframework.stereotype.Component;\r
 \r
+import eu.etaxonomy.cdm.common.CdmUtils;\r
+import eu.etaxonomy.cdm.common.StringComparator;\r
+import eu.etaxonomy.cdm.io.common.DbImportStateBase;\r
 import eu.etaxonomy.cdm.io.common.IOValidator;\r
 import eu.etaxonomy.cdm.io.common.mapping.DbIgnoreMapper;\r
+import eu.etaxonomy.cdm.io.common.mapping.DbImportAnnotationMapper;\r
 import eu.etaxonomy.cdm.io.common.mapping.DbImportExtensionMapper;\r
 import eu.etaxonomy.cdm.io.common.mapping.DbImportLsidMapper;\r
 import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;\r
 import eu.etaxonomy.cdm.io.common.mapping.DbImportMarkerMapper;\r
+import eu.etaxonomy.cdm.io.common.mapping.DbImportMethodMapper;\r
 import eu.etaxonomy.cdm.io.common.mapping.DbImportObjectCreationMapper;\r
 import eu.etaxonomy.cdm.io.common.mapping.DbImportStringMapper;\r
-import eu.etaxonomy.cdm.io.common.mapping.DbNotYetImplementedMapper;\r
 import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;\r
+import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;\r
+import eu.etaxonomy.cdm.io.common.mapping.out.DbLastActionMapper;\r
 import eu.etaxonomy.cdm.io.pesi.erms.validation.ErmsTaxonImportValidator;\r
 import eu.etaxonomy.cdm.io.pesi.out.PesiTaxonExport;\r
+import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;\r
+import eu.etaxonomy.cdm.model.common.AnnotationType;\r
 import eu.etaxonomy.cdm.model.common.CdmBase;\r
 import eu.etaxonomy.cdm.model.common.ExtensionType;\r
+import eu.etaxonomy.cdm.model.common.Language;\r
+import eu.etaxonomy.cdm.model.common.MarkerType;\r
+import eu.etaxonomy.cdm.model.common.RelationshipTermBase;\r
 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;\r
+import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;\r
 import eu.etaxonomy.cdm.model.name.Rank;\r
 import eu.etaxonomy.cdm.model.name.TaxonName;\r
 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;\r
@@ -43,9 +57,9 @@ import eu.etaxonomy.cdm.model.reference.Reference;
 import eu.etaxonomy.cdm.model.taxon.Synonym;\r
 import eu.etaxonomy.cdm.model.taxon.Taxon;\r
 import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
+import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;\r
 import eu.etaxonomy.cdm.strategy.cache.name.TaxonNameDefaultCacheStrategy;\r
 \r
-\r
 /**\r
  * @author a.mueller\r
  * @since 20.02.2010\r
@@ -58,68 +72,79 @@ public class ErmsTaxonImport
     private static final long serialVersionUID = -7111568277264140051L;\r
     private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class);\r
 \r
-       public static final UUID TNS_EXT_UUID = UUID.fromString("41cb0450-ac84-4d73-905e-9c7773c23b05");\r
-\r
-       private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;\r
-\r
        private static final String pluralString = "taxa";\r
        private static final String dbTableName = "tu";\r
        private static final Class<?> cdmTargetClass = TaxonBase.class;\r
 \r
+       private static Map<String, Integer> unacceptReasons = new HashMap<>();\r
+\r
+       private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;\r
+\r
        public ErmsTaxonImport(){\r
                super(pluralString, dbTableName, cdmTargetClass);\r
        }\r
 \r
-//     @Override\r
-//     protected String getIdQuery() {\r
-//             String strQuery = " SELECT id FROM tu WHERE id < 300000 " ;\r
-//             return strQuery;\r
-//     }\r
-\r
+       @Override\r
+       protected String getIdQuery() {\r
+               String strQuery = " SELECT id FROM tu " ;  //WHERE id NOT IN (147415) for now we exclude Monera as it has no children and is unclear what classification it has. In ERMS it is alternative accepted name (in https://en.wikipedia.org/wiki/Monera it might be a super taxon to bacteria).\r
+               return strQuery;\r
+       }\r
 \r
        @Override\r
     protected DbImportMapping<ErmsImportState, ErmsImportConfigurator> getMapping() {\r
                if (mapping == null){\r
-                       mapping = new DbImportMapping<ErmsImportState, ErmsImportConfigurator>();\r
+                       mapping = new DbImportMapping<>();\r
 \r
                        mapping.addMapper(DbImportObjectCreationMapper.NewInstance(this, "id", TAXON_NAMESPACE)); //id + tu_status\r
-                       UUID tsnUuid = ErmsTransformer.uuidTsn;\r
                        mapping.addMapper(DbImportLsidMapper.NewInstance("GUID", "lsid"));\r
 \r
+                       UUID tsnUuid = ErmsTransformer.uuidExtTsn;\r
                        ExtensionType tsnExtType = getExtensionType(tsnUuid, "TSN", "TSN", "TSN");\r
                        mapping.addMapper(DbImportExtensionMapper.NewInstance("tsn", tsnExtType));\r
 //                     mapping.addMapper(DbImportStringMapper.NewInstance("tu_name", "(NonViralName)name.nameCache"));\r
 \r
-                       ExtensionType displayNameExtType = getExtensionType(ErmsTransformer.uuidDisplayName, "display name", "display name", "display name");\r
+                       ExtensionType displayNameExtType = getExtensionType(ErmsTransformer.uuidExtDisplayName, "display name", "display name", "display name");\r
                        mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_displayname", displayNameExtType));\r
-                       ExtensionType fuzzyNameExtType = getExtensionType(ErmsTransformer.uuidFuzzyName, "fuzzy name", "fuzzy name", "fuzzy name");\r
-               //      mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_fuzzyname", fuzzyNameExtType));\r
-                       mapping.addMapper(DbImportStringMapper.NewInstance("tu_authority", "(NonViralName)name.authorshipCache"));\r
+            //Ignore fuzzyName\r
+            //  ExtensionType fuzzyNameExtType = getExtensionType(ErmsTransformer.uuidExtFuzzyName, "fuzzy name", "fuzzy name", "fuzzy name");\r
+            //  mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_fuzzyname", fuzzyNameExtType));\r
+                       mapping.addMapper(DbImportStringMapper.NewInstance("tu_authority", "name.authorshipCache"));\r
 \r
-                       ExtensionType fossilStatusExtType = getExtensionType(ErmsTransformer.uuidFossilStatus, "fossil status", "fossil status", "fos. stat.");\r
+                       ExtensionType fossilStatusExtType = getExtensionType(ErmsTransformer.uuidExtFossilStatus, "fossil status", "fossil status", "fos. stat.");\r
                        mapping.addMapper(DbImportExtensionMapper.NewInstance("fossil_name", fossilStatusExtType));\r
-//                     mapping.addMapper(DbImportExtensionTypeCreationMapper.NewInstance("fossil_name", EXTENSION_TYPE_NAMESPACE, "fossil_name", "fossil_name", "fossil_name"));\r
 \r
-                       ExtensionType unacceptExtType = getExtensionType(ErmsTransformer.uuidUnacceptReason, "unaccept reason", "unaccept reason", "reason");\r
+                       ExtensionType unacceptExtType = getExtensionType(ErmsTransformer.uuidExtUnacceptReason, "unaccept reason", "unaccept reason", "reason");\r
                        mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_unacceptreason", unacceptExtType));\r
 \r
-                       ExtensionType qualityStatusExtType = getExtensionType(ErmsTransformer.uuidQualityStatus, "quality status", "quality status", "quality status");\r
+                       ExtensionType qualityStatusExtType = getExtensionType(ErmsTransformer.uuidExtQualityStatus, "quality status", "quality status", "quality status");\r
                        mapping.addMapper(DbImportExtensionMapper.NewInstance("qualitystatus_name", qualityStatusExtType)); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor\r
 \r
+                       ExtensionType cacheCitationExtType = getExtensionType(PesiTransformer.uuidExtCacheCitation, "cache_citation", "quality status", "cache_citation");\r
+            mapping.addMapper(DbImportExtensionMapper.NewInstance("cache_citation", cacheCitationExtType));\r
+\r
+            //flags\r
                        mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_marine", ErmsTransformer.uuidMarkerMarine, "marine", "marine", "marine", null));\r
                        mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_brackish", ErmsTransformer.uuidMarkerBrackish, "brackish", "brackish", "brackish", null));\r
                        mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_fresh", ErmsTransformer.uuidMarkerFreshwater, "freshwater", "fresh", "fresh", null));\r
                        mapping.addMapper(DbImportMarkerMapper.NewInstance("tu_terrestrial", ErmsTransformer.uuidMarkerTerrestrial, "terrestrial", "terrestrial", "terrestrial", null));\r
 \r
+                       //last action, species expert\r
+                       ExtensionType speciesExpertNameExtType = getExtensionType(PesiTransformer.uuidExtSpeciesExpertName, "species expert name", "species expert name", "species expert name");\r
+            mapping.addMapper(DbImportExtensionMapper.NewInstance("ExpertName", speciesExpertNameExtType)); //according to sql script ExpertName maps to SpeciesExpertName in ERMS\r
+            AnnotationType lastActionDateType = getAnnotationType(DbLastActionMapper.uuidAnnotationTypeLastActionDate, "Last action date", "Last action date", null);\r
+                       mapping.addMapper(DbImportAnnotationMapper.NewInstance("lastActionDate", lastActionDateType));\r
+            AnnotationType lastActionType = getAnnotationType(DbLastActionMapper.uuidAnnotationTypeLastAction, "Last action", "Last action", null);\r
+            MarkerType hasNoLastActionMarkerType = getMarkerType(DbLastActionMapper.uuidMarkerTypeHasNoLastAction, "has no last action", "No last action information available", "no last action");\r
+            mapping.addMapper(DbImportAnnotationMapper.NewInstance("lastAction", lastActionType, hasNoLastActionMarkerType));\r
 \r
-//                     UUID hiddenUuid = ErmsTransformer.uuidHidden;\r
-//                     mapping.addMapper(DbImportMarkerCreationMapper.Mapper.NewInstance("qualitystatus_name", qualityUuid, "quality status", "quality status", "quality status")); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor\r
+            //MAN authorshipCache => appendedPhrase\r
+            mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "appendedPhraseForMisapplications", ErmsImportState.class));\r
 \r
-                       //not yet implemented\r
-                       mapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_sp", "included in rank/object creation"));\r
-                       mapping.addMapper(DbIgnoreMapper.NewInstance("cache_citation", "Needs check if this is needed in PESI"));\r
+            //titleCache compare\r
+            mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "testTitleCache", ErmsImportState.class));\r
 \r
                        //ignore\r
+            mapping.addMapper(DbIgnoreMapper.NewInstance("tu_sp", "included in rank/object creation, only needed for defining kingdom"));\r
                        mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fossil", "tu_fossil implemented as foreign key"));\r
 \r
                }\r
@@ -128,17 +153,23 @@ public class ErmsTaxonImport
 \r
        @Override\r
        protected String getRecordQuery(ErmsImportConfigurator config) {\r
-               String strSelect = " SELECT tu.*, parent1.tu_name AS parent1name, parent2.tu_name AS parent2name, parent3.tu_name AS parent3name, "\r
-                       + " parent1.tu_rank AS parent1rank, parent2.tu_rank AS parent2rank, parent3.tu_rank AS parent3rank, " +\r
-                       " status.status_id as status_id, status.status_name, fossil.fossil_name, qualitystatus.qualitystatus_name";\r
+               String strSelect = " SELECT tu.*, parent1.tu_name AS parent1name, parent2.tu_name AS parent2name, parent3.tu_name AS parent3name, parent4.tu_name AS parent4name, " +\r
+                           " parent1.tu_rank AS parent1rank, parent2.tu_rank AS parent2rank, parent3.tu_rank AS parent3rank, " +\r
+                           " status.status_id as status_id, status.status_name, fossil.fossil_name, qualitystatus.qualitystatus_name," +\r
+                           " s.sessiondate lastActionDate, a.action_name lastAction, s.ExpertName ";\r
                String strFrom = " FROM tu  LEFT OUTER JOIN  tu AS parent1 ON parent1.id = tu.tu_parent " +\r
                                " LEFT OUTER JOIN   tu AS parent2  ON parent2.id = parent1.tu_parent " +\r
                                " LEFT OUTER JOIN tu AS parent3 ON parent2.tu_parent = parent3.id " +\r
-                               " LEFT OUTER JOIN status ON tu.tu_status = status.status_id " +\r
+                               " LEFT OUTER JOIN tu AS parent4 ON parent3.tu_parent = parent4.id " +\r
+                " LEFT OUTER JOIN status ON tu.tu_status = status.status_id " +\r
                                " LEFT OUTER JOIN fossil ON tu.tu_fossil = fossil.fossil_id " +\r
-                               " LEFT OUTER JOIN qualitystatus ON tu.tu_qualitystatus = qualitystatus.id ";\r
+                               " LEFT OUTER JOIN qualitystatus ON tu.tu_qualitystatus = qualitystatus.id " +\r
+                               " LEFT OUTER JOIN tu_sessions ts ON ts.tu_id = tu.id " +\r
+                " LEFT OUTER JOIN [sessions] s ON s.id = ts.session_id " +\r
+                " LEFT OUTER JOIN actions a ON a.id = ts.action_id ";\r
                String strWhere = " WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";\r
-               String strRecordQuery = strSelect + strFrom + strWhere;\r
+               String strOrderBy = " ORDER BY tu.id, s.sessiondate DESC, a.id DESC ";\r
+               String strRecordQuery = strSelect + strFrom + strWhere + strOrderBy;\r
                return strRecordQuery;\r
        }\r
 \r
@@ -148,34 +179,48 @@ public class ErmsTaxonImport
 \r
                //first path\r
                super.doInvoke(state);\r
+               if(true){\r
+                   logUnacceptReasons();\r
+               }\r
                return;\r
        }\r
 \r
+    Integer lastTaxonId = null;\r
+    @Override\r
+    protected boolean ignoreRecord(ResultSet rs) throws SQLException {\r
+        Integer id = rs.getInt("id");\r
+        boolean result = id.equals(lastTaxonId);\r
+        lastTaxonId = id;\r
+        return result;\r
+    }\r
+\r
        private Set<Integer> getAcceptedTaxaKeys(ErmsImportState state) {\r
-               Set<Integer> result = new HashSet<Integer>();\r
-               String parentCol = "tu_parent";\r
-               String accCol = " tu_acctaxon";\r
+               Set<Integer> result = new HashSet<>();\r
                String idCol = " id ";\r
                String tuFk = "tu_id";\r
-               String taxonTable = "tu";\r
                String vernacularsTable = "vernaculars";\r
                String distributionTable = "dr";\r
-               String sql = " SELECT DISTINCT %s FROM %s  " +\r
-                               " UNION  SELECT %s FROM %s WHERE %s is NULL" +\r
-                               " UNION  SELECT DISTINCT %s FROM %s " +\r
-                               " UNION  SELECT DISTINCT %s FROM %s " +\r
-                               " UNION  SELECT DISTINCT %s FROM %s ";\r
+               String notesTable = "notes";\r
+               String sql =\r
+                "          SELECT id FROM tu WHERE tu_accfinal is NULL" //id of taxa not having accepted taxon\r
+                + " UNION  SELECT DISTINCT tu_accfinal FROM tu "  //fk to accepted taxon (either the accepted taxon or the taxon itself, if accepted)\r
+                + " UNION  SELECT id FROM tu WHERE trim(tu.tu_unacceptreason) like 'misidentification' OR trim(tu.tu_unacceptreason) like 'misidentifications' OR "\r
+                            + " tu.tu_unacceptreason like 'misapplied %%name' OR "\r
+                            + " tu.tu_unacceptreason like '%%misapplication%%' OR "\r
+                            + " tu.tu_unacceptreason like 'incorrect identification%%'" //Misapplications, see ErmsTransformer.getSynonymRelationTypesByKey\r
+                + " UNION  SELECT syn.id FROM tu syn INNER JOIN tu acc ON syn.tu_accfinal = acc.id WHERE syn.id = acc.tu_parent AND acc.id <> syn.id "  //see also ErmsTaxonRelationImport.isAccepted, there are some autonyms being the accepted taxon of there own parents\r
+                + " UNION  SELECT DISTINCT %s FROM %s " //vernaculars\r
+                + " UNION  SELECT DISTINCT %s FROM %s "  //distributions\r
+                + " UNION  SELECT DISTINCT %s FROM %s ";  //notes\r
                sql = String.format(sql,\r
-                               parentCol, taxonTable,\r
-                               idCol, taxonTable, accCol,\r
-                               accCol, taxonTable,\r
-                               tuFk, vernacularsTable,\r
-                               tuFk, distributionTable);\r
+                       tuFk, vernacularsTable,\r
+                               tuFk, distributionTable,\r
+                               tuFk, notesTable);\r
                ResultSet rs = state.getConfig().getSource().getResultSet(sql);\r
                try {\r
                        while (rs.next()){\r
                                Integer id;\r
-                               id = rs.getInt(parentCol);\r
+                               id = rs.getInt(idCol.trim());\r
                                result.add(id);\r
                        }\r
                        return result;\r
@@ -187,37 +232,80 @@ public class ErmsTaxonImport
 \r
        @Override\r
        public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, ErmsImportState state) {\r
-//             String nameSpace;\r
-//             Class<?> cdmClass;\r
-//             Set<String> idSet;\r
-               Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();\r
-\r
-               try{\r
-//                             Set<String> referenceIdSet = new HashSet<>();\r
-                               while (rs.next()){\r
-       //                              handleForeignKey(rs, referenceIdSet, "PTRefFk");\r
-                               }\r
-\r
-                       //reference map\r
-//                     nameSpace = "Reference";\r
-//                     cdmClass = Reference.class;\r
-//                     Map<String, Person> referenceMap = (Map<String, Person>)getCommonService().getSourcedObjectsByIdInSource(Person.class, teamIdSet, nameSpace);\r
-//                     result.put(Reference.class, referenceMap);\r
-\r
-               } catch (SQLException e) {\r
-                       throw new RuntimeException(e);\r
-               }\r
+               //currently no referencing objects needed\r
+           Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();\r
                return result;\r
        }\r
 \r
        @Override\r
        public TaxonBase<?> createObject(ResultSet rs, ErmsImportState state) throws SQLException {\r
                int statusId = rs.getInt("status_id");\r
-//             Object accTaxonId = rs.getObject("tu_acctaxon");\r
                Integer meId = rs.getInt("id");\r
+               Integer accFinal = nullSafeInt(rs, "tu_accfinal");\r
+\r
+        TaxonName taxonName = getTaxonName(rs, state);\r
+               fillTaxonName(taxonName, rs, state, meId);\r
 \r
-               String tuName = rs.getString("tu_name");\r
-               String displayName = rs.getString("tu_displayname");\r
+               //add original source for taxon name (taxon original source is added in mapper)\r
+               Reference citation = state.getTransactionalSourceReference();\r
+               addOriginalSource(rs, taxonName, "id", NAME_NAMESPACE, citation);\r
+\r
+               TaxonBase<?> result;\r
+               //handle accepted<-> synonym, we create more accepted taxa as we need them within the tree or to attache factual data\r
+               if (state.getAcceptedTaxaKeys().contains(meId)){\r
+                       Taxon taxon = Taxon.NewInstance(taxonName, citation);\r
+                       if (statusId != 1){\r
+                               logger.info("Taxon created as taxon but has status <> 1 ("+statusId+"): " + meId);\r
+                               boolean idsDiffer = accFinal != null && !meId.equals(accFinal);\r
+                               handleNotAcceptedTaxonStatus(taxon, statusId, idsDiffer, accFinal == null, state, rs);\r
+                       }\r
+                       result = taxon;\r
+               }else{\r
+                       result = Synonym.NewInstance(taxonName, citation);\r
+                       //real synonyms (id <> tu_accfinal) are always handled as "synonym" or "pro parte synonym"\r
+//                     handleNotAcceptedTaxonStatus(result, statusId, state, rs);\r
+               }\r
+\r
+               handleNameStatus(result.getName(), rs, state);\r
+               return result;\r
+       }\r
+\r
+    private void handleNameStatus(TaxonName name, ResultSet rs, ErmsImportState state) throws SQLException {\r
+        NomenclaturalStatusType nomStatus = null;\r
+        int tuStatus = rs.getInt("tu_status");\r
+        //the order is bottom up from SQL script as their values are overridden from top to bottom\r
+        if (tuStatus == 8){\r
+            //species inquirenda\r
+            nomStatus = getNomenclaturalStatusType(state, ErmsTransformer.uuidNomStatusSpeciesInquirenda, "species inquirenda", "species inquirenda", null, Language.LATIN(), null);\r
+        }else if (tuStatus == 7){\r
+            //temporary name\r
+            nomStatus = getNomenclaturalStatusType(state, PesiTransformer.uuidNomStatusTemporaryName, "temporary name", "temporary name", null, Language.ENGLISH(), null);\r
+        }else if (tuStatus == 6){\r
+            //nomen dubium\r
+            nomStatus = NomenclaturalStatusType.DOUBTFUL();\r
+        }else if (tuStatus == 5){\r
+            //"alternate representation"\r
+            nomStatus = getNomenclaturalStatusType(state, ErmsTransformer.uuidNomStatusAlternateRepresentation, "alternate representation", "alternate representation", null, Language.ENGLISH(), null);\r
+        }else if (tuStatus == 3){\r
+            //nomen nudum\r
+            nomStatus = NomenclaturalStatusType.NUDUM();\r
+        }\r
+        if (nomStatus == null){\r
+            //IN SQL Script it is set first by unacceptreason and then overriden if above tu_status exists\r
+            String unacceptReason = rs.getString("tu_unacceptreason");\r
+            try {\r
+                nomStatus = state.getTransformer().getNomenclaturalStatusByKey(unacceptReason);\r
+            } catch (UndefinedTransformerMethodException e) {logger.warn("Unhandled method");\r
+            }\r
+        }\r
+        if (nomStatus != null){\r
+            name.addStatus(nomStatus, null, null);\r
+        }\r
+    }\r
+\r
+    private TaxonName fillTaxonName(TaxonName taxonName, ResultSet rs, ErmsImportState state, Integer meId) throws SQLException {\r
+        String tuName = rs.getString("tu_name");\r
+               String displayName = rs.getString("tu_displayname").trim();\r
 \r
                String parent1Name = rs.getString("parent1name");\r
                Integer parent1Rank = rs.getInt("parent1rank");\r
@@ -226,9 +314,10 @@ public class ErmsTaxonImport
                Integer parent2Rank = rs.getInt("parent2rank");\r
 \r
                String parent3Name = rs.getString("parent3name");\r
-//             Integer parent3Rank = rs.getInt("parent3rank");\r
+               Integer parent3Rank = rs.getInt("parent3rank");\r
+\r
+           String parent4Name = rs.getString("parent4name");\r
 \r
-               TaxonName taxonName = getTaxonName(rs, state);\r
                //set epithets\r
                if (taxonName.isGenus() || taxonName.isSupraGeneric()){\r
                        taxonName.setGenusOrUninomial(tuName);\r
@@ -243,10 +332,15 @@ public class ErmsTaxonImport
                                handleException(parent1Rank, taxonName, displayName, meId);\r
                        }\r
                        taxonName.setInfraSpecificEpithet(tuName);\r
-                       taxonName.setSpecificEpithet(parent1Name);\r
-                       getGenusAndInfraGenus(parent2Name, parent3Name, parent2Rank, taxonName);\r
+                       if (parent1Rank > 220){  //parent is still infraspecific\r
+                           taxonName.setSpecificEpithet(parent2Name);\r
+                           getGenusAndInfraGenus(parent3Name, parent4Name, parent3Rank, taxonName);\r
+                       }else{\r
+                           //default\r
+                           taxonName.setSpecificEpithet(parent1Name);\r
+                           getGenusAndInfraGenus(parent2Name, parent3Name, parent2Rank, taxonName);\r
+                       }\r
                }else if (taxonName.getRank()== null){\r
-                       logger.warn("rank super domain still needs to be implemented. Used domain instead.");\r
                        if ("Biota".equalsIgnoreCase(tuName)){\r
                                Rank rank = Rank.DOMAIN();  //should be Superdomain\r
                                taxonName.setRank(rank);\r
@@ -256,66 +350,124 @@ public class ErmsTaxonImport
                                logger.warn(warning);\r
                                taxonName.setNameCache(tuName);\r
                        }\r
-\r
                }\r
+\r
                //e.g. Leucon [Platyhelminthes] ornatus\r
                if (containsBrackets(displayName)){\r
                        taxonName.setNameCache(displayName);\r
-                       logger.warn("Set name cache: " +  displayName + ";id =" + meId);\r
-               }\r
-\r
-               //add original source for taxon name (taxon original source is added in mapper\r
-               Reference citation = state.getTransactionalSourceReference();\r
-               addOriginalSource(rs, taxonName, "id", NAME_NAMESPACE, citation);\r
-\r
-               //old: if (statusId == 1){\r
-               if (state.getAcceptedTaxaKeys().contains(meId)){\r
-                       Taxon result = Taxon.NewInstance(taxonName, citation);\r
-                       if (statusId != 1){\r
-                               logger.info("Taxon created as taxon but has status <> 1 ("+statusId+"): " + meId);\r
-                               handleNotAcceptedTaxon(result, statusId, state, rs);\r
-                       }\r
-                       return result;\r
-               }else{\r
-                       return Synonym.NewInstance(taxonName, citation);\r
+                       logger.warn("Set name cache: " +  displayName + "; id =" + meId);\r
                }\r
+        if (!taxonName.getNameCache().equals(displayName) && !isErroneousSubgenus(taxonName, displayName)){\r
+            int pos = CdmUtils.diffIndex(taxonName.getNameCache(), displayName);\r
+            logger.warn("Computed name cache differs at "+pos+".\n Computed   : " + taxonName.getNameCache()+"\n DisplayName: " +displayName);\r
+            taxonName.setNameCache(displayName, true);\r
+        }\r
+               taxonName.getTitleCache();\r
+        return taxonName;\r
+    }\r
+\r
+    private static boolean isErroneousSubgenus(TaxonName taxonName, String displayName) {\r
+        //this is an error in ERMS formatting in v2019 for ICNafp names, that hopefully soon will be corrected\r
+        return (Rank.SPECIES().equals(taxonName.getRank()) && displayName.contains(" subg. "));\r
+    }\r
+\r
+    @SuppressWarnings("unused")  //used by MethodMapper\r
+    private static TaxonBase<?> appendedPhraseForMisapplications(ResultSet rs, ErmsImportState state) throws SQLException{\r
+        TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);\r
+        TaxonName taxonName = taxon.getName();\r
+        String unacceptreason = rs.getString("tu_unacceptreason");\r
+        RelationshipTermBase<?>[] rels = state.getTransformer().getSynonymRelationTypesByKey(unacceptreason, state);\r
+        if (rels[1]!= null && rels[1].equals(TaxonRelationshipType.MISAPPLIED_NAME_FOR())){\r
+            taxon.setAppendedPhrase(taxonName.getAuthorshipCache());\r
+            taxon.setSec(null);\r
+            taxonName.setAuthorshipCache(null, taxonName.isProtectedAuthorshipCache());\r
+            //TODO maybe some further authorship handling is needed if authors get parsed, but not very likely for MAN authorship\r
+        }\r
+        if(state.getUnhandledUnacceptReason() != null){\r
+            //to handle it hear is a workaround, as the real place where it is handled is DbImportSynonymMapper which is called ErmsTaxonRelationImport but where it is diffcult to aggregate logging data\r
+            addUnacceptReason(state.getUnhandledUnacceptReason());\r
+        }\r
+        return taxon;\r
+    }\r
+\r
+    private static void addUnacceptReason(String unhandledUnacceptReason) {\r
+        unhandledUnacceptReason = unhandledUnacceptReason.toLowerCase();\r
+        if (!unacceptReasons.keySet().contains(unhandledUnacceptReason)){\r
+            unacceptReasons.put(unhandledUnacceptReason, 1);\r
+        }else{\r
+            unacceptReasons.put(unhandledUnacceptReason, unacceptReasons.get(unhandledUnacceptReason)+1);\r
+        }\r
+    }\r
+\r
+    @SuppressWarnings("unused")  //used by MethodMapper\r
+    private static TaxonBase<?> testTitleCache(ResultSet rs, ErmsImportState state) throws SQLException{\r
+        TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);\r
+        TaxonName taxonName = taxon.getName();\r
+        String displayName = rs.getString("tu_displayname");\r
+        displayName = displayName == null ? null : displayName.trim();\r
+        String titleCache = taxonName.resetTitleCache(); //calling titleCache should always be kept to have a computed titleCache in the CDM DB.\r
+        titleCache = CdmUtils.concat(" ", titleCache, taxon.getAppendedPhrase());\r
+        String expectedTitleCache = getExpectedTitleCache(rs);\r
+        //TODO check titleCache, but beware of autonyms\r
+        if (!titleCache.equals(expectedTitleCache) && !isErroneousSubgenus(taxonName, displayName)){\r
+            int pos = CdmUtils.diffIndex(titleCache, expectedTitleCache);\r
+            logger.warn("Computed title cache differs at "+pos+".\n Computed             : " + titleCache + "\n DisplayName+Authority: " + expectedTitleCache);\r
+            taxonName.setNameCache(displayName, true);\r
+        }\r
+        return taxon;\r
+    }\r
+\r
+    //see also PesiErmsValidation.srcFullName()\r
+    private static String getExpectedTitleCache(ResultSet srcRs) throws SQLException {\r
+        String result;\r
+        String epi = srcRs.getString("tu_name");\r
+        epi = " a" + epi;\r
+        String display = srcRs.getString("tu_displayname");\r
+        String sp = srcRs.getString("tu_sp");\r
+        if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal\r
+            result = srcRs.getString("tu_displayname").replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";\r
+        }else{\r
+            result = CdmUtils.concat(" ", srcRs.getString("tu_displayname"), srcRs.getString("tu_authority"));\r
+        }\r
+        return result.trim();\r
+    }\r
+\r
+    private void handleNotAcceptedTaxonStatus(Taxon taxon, int statusId, boolean idsDiffer, boolean accIdNull, ErmsImportState state, ResultSet rs) throws SQLException {\r
+               ExtensionType pesiStatusType = getExtensionType(state, ErmsTransformer.uuidPesiTaxonStatus, "PESI taxon status", "PESI taxon status", "status", null);\r
+\r
+               if(idsDiffer){\r
+                   //if ids differ the taxon should always be an ordinary synonym, some synonyms need to be imported to CDM as Taxon because they have factual data attached, they use a concept relationship as synonym relationship\r
+                   addPesiStatus(taxon, PesiTransformer.T_STATUS_SYNONYM, pesiStatusType);\r
+               }else if(statusId == 1){\r
+            //nothing to do, not expected to happen\r
+               }else if (statusId > 1 && statusId < 6 || statusId == 7){ //unaccepted, nomen nudum, alternate representation, temporary name       they have sometimes no tu_accfinal or are handled incorrect\r
+                   //TODO discuss alternate representations, at the very end of the PESI export unaccepted taxa with relationship "is alternative name for" are set to status "accepted". Need to check if this is true for the PESI taxa too (do they have such a relationship?)\r
+                   //Note: in SQL script, also the tu_unacceptreason was checked to be NOT LIKE '%syno%', this is not always correct and the few real synonyms should better data cleaned\r
+                   addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType);\r
+        }else if (statusId == 6 || statusId == 8 || statusId == 10){\r
+            taxon.setDoubtful(true);  //nomen dubium, taxon inquirendum, uncertain\r
+        }else if (statusId == 9){\r
+            addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType);         //interim unpublished, we should better not yet publish, but will be probably accepted in future\r
+        }else{\r
+            logger.error("Unhandled statusId "+ statusId);\r
+        }\r
        }\r
 \r
-       private void handleNotAcceptedTaxon(Taxon taxon, int statusId, ErmsImportState state, ResultSet rs) throws SQLException {\r
-               ExtensionType notAccExtensionType = getExtensionType(state, ErmsTransformer.uuidErmsTaxonStatus, "ERMS taxon status", "ERMS taxon status", "status", null);\r
-               String statusName = rs.getString("status_name");\r
+    private void addPesiStatus(Taxon taxon, int status, ExtensionType pesiStatusType) {\r
+        taxon.addExtension(String.valueOf(status), pesiStatusType);\r
 \r
-               if (statusId > 1){\r
-                       taxon.addExtension(statusName, notAccExtensionType);\r
-               }\r
-       }\r
+    }\r
 \r
-       /**\r
-        * @param parent1Rank\r
-        * @param displayName\r
-        * @param taxonName\r
-        * @param meId\r
-        */\r
-       private void handleException(Integer parent1Rank, TaxonName taxonName, String displayName, Integer meId) {\r
-               logger.warn("Parent of infra specific taxon is higher than species. Used nameCache: " + displayName +  "; id=" + meId) ;\r
+    private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) {\r
+               logger.warn("Parent of infra specific taxon is of higher rank ("+parentRank+") than species. Used nameCache: " + displayName +  "; id=" + meId) ;\r
                taxonName.setNameCache(displayName);\r
        }\r
 \r
-       /**\r
-        * @param displayName\r
-        * @return\r
-        */\r
        private boolean containsBrackets(String displayName) {\r
                int index = displayName.indexOf("[");\r
                return (index > -1);\r
        }\r
 \r
-       /**\r
-        * @param parent1Name\r
-        * @param parent2Name\r
-        * @param parent1Rank\r
-        * @param taxonName\r
-        */\r
        private void getGenusAndInfraGenus(String parentName, String grandParentName, Integer parent1Rank, TaxonName taxonName) {\r
                if (parent1Rank <220 && parent1Rank > 180){\r
                        //parent is infrageneric\r
@@ -328,22 +480,16 @@ public class ErmsTaxonImport
 \r
        /**\r
         * Returns an empty Taxon Name instance according to the given rank and kingdom.\r
-        * @param rs\r
-        * @return\r
-        * @throws SQLException\r
         */\r
        private TaxonName getTaxonName(ResultSet rs, ErmsImportState state) throws SQLException {\r
            TaxonName result;\r
-               Integer kingdomId = parseKingdomId(rs);\r
+               int kingdomId = parseKingdomId(rs);\r
                Integer intRank = rs.getInt("tu_rank");\r
 \r
                NomenclaturalCode nc = ErmsTransformer.kingdomId2NomCode(kingdomId);\r
                Rank rank = null;\r
-               if (kingdomId != null){\r
-                       rank = state.getRank(intRank, kingdomId);\r
-               }else{\r
-                       logger.warn("KingdomId is null");\r
-               }\r
+               rank = state.getRank(intRank, kingdomId);\r
+\r
                if (rank == null){\r
                        logger.warn("Rank is null. KingdomId: " + kingdomId + ", rankId: " +  intRank);\r
                }\r
@@ -370,29 +516,63 @@ public class ErmsTaxonImport
         * @throws SQLException\r
         */\r
        private int parseKingdomId(ResultSet rs) throws SQLException {\r
-               Integer result = null;\r
                String treeString = rs.getString("tu_sp");\r
                if (treeString != null){\r
-                       if (StringUtils.isNotBlank(treeString) && treeString.length() > 1){\r
+                   if (StringUtils.isNotBlank(treeString) && treeString.length() > 1){\r
                                String strKingdom = treeString.substring(1,2);\r
 \r
                                if (! treeString.substring(0, 1).equals("#") && ! treeString.substring(2, 3).equals("#") ){\r
-                                       logger.warn("Tree string " + treeString + " has no recognized format");\r
+                                       String message = "Tree string " + treeString + " has no recognized format";\r
+                    logger.warn(message);\r
+                    throw new RuntimeException(message);\r
                                }else{\r
                                        try {\r
-                                               result = Integer.valueOf(strKingdom);\r
+                                               return Integer.valueOf(strKingdom);\r
                                        } catch (NumberFormatException e) {\r
-                                               logger.warn("Kingdom string " + strKingdom + "could not be recognized as a valid number");\r
+                                           String message = "Kingdom string " + strKingdom + "could not be recognized as a valid number";\r
+                                               logger.warn(message);\r
+                                               throw new RuntimeException(message);\r
                                        }\r
                                }\r
+                       }else{\r
+                String message = "Tree string for kingdom recognition is to short: " + treeString;\r
+                logger.warn(message);\r
+                throw new RuntimeException(message);\r
                        }\r
                }else{\r
-                       Integer tu_id = rs.getInt("id");\r
-                       result = tu_id;\r
+                       int tu_id = rs.getInt("id");\r
+                       return tu_id;\r
                }\r
-               return result;\r
        }\r
 \r
+    private void logUnacceptReasons() {\r
+        String logStr = "\n Unhandled unaccept reasons:\n===================";\r
+\r
+        while (!unacceptReasons.isEmpty()) {\r
+            int n = 0;\r
+            List<String> mostUsedStrings = new ArrayList<>();\r
+            for (Map.Entry<String, Integer> entry : unacceptReasons.entrySet()) {\r
+                if (entry.getValue() > n) {\r
+                    mostUsedStrings = new ArrayList<>();\r
+                    mostUsedStrings.add(entry.getKey());\r
+                    n = entry.getValue();\r
+                } else if (entry.getValue() == n) {\r
+                    mostUsedStrings.add(entry.getKey());\r
+                } else {\r
+                    //neglect\r
+                }\r
+            }\r
+            mostUsedStrings.sort(new StringComparator());\r
+            logStr += "\n   " + String.valueOf(n);\r
+            for (String str : mostUsedStrings) {\r
+                logStr += "\n   "+ str;\r
+                unacceptReasons.remove(str);\r
+            }\r
+        }\r
+        logger.warn(logStr);\r
+\r
+    }\r
+\r
        @Override\r
        protected boolean doCheck(ErmsImportState state){\r
                IOValidator<ErmsImportState> validator = new ErmsTaxonImportValidator();\r