\r
import java.sql.ResultSet;\r
import java.sql.SQLException;\r
+import java.util.ArrayList;\r
import java.util.HashMap;\r
import java.util.HashSet;\r
+import java.util.List;\r
import java.util.Map;\r
import java.util.Set;\r
import java.util.UUID;\r
import org.springframework.stereotype.Component;\r
\r
import eu.etaxonomy.cdm.common.CdmUtils;\r
+import eu.etaxonomy.cdm.common.StringComparator;\r
import eu.etaxonomy.cdm.io.common.DbImportStateBase;\r
import eu.etaxonomy.cdm.io.common.IOValidator;\r
import eu.etaxonomy.cdm.io.common.mapping.DbIgnoreMapper;\r
import eu.etaxonomy.cdm.io.common.mapping.DbImportMethodMapper;\r
import eu.etaxonomy.cdm.io.common.mapping.DbImportObjectCreationMapper;\r
import eu.etaxonomy.cdm.io.common.mapping.DbImportStringMapper;\r
-import eu.etaxonomy.cdm.io.common.mapping.DbNotYetImplementedMapper;\r
import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;\r
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;\r
import eu.etaxonomy.cdm.io.common.mapping.out.DbLastActionMapper;\r
import eu.etaxonomy.cdm.model.common.ExtensionType;\r
import eu.etaxonomy.cdm.model.common.Language;\r
import eu.etaxonomy.cdm.model.common.MarkerType;\r
+import eu.etaxonomy.cdm.model.common.RelationshipTermBase;\r
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;\r
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;\r
import eu.etaxonomy.cdm.model.name.Rank;\r
import eu.etaxonomy.cdm.model.taxon.Synonym;\r
import eu.etaxonomy.cdm.model.taxon.Taxon;\r
import eu.etaxonomy.cdm.model.taxon.TaxonBase;\r
+import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;\r
import eu.etaxonomy.cdm.strategy.cache.name.TaxonNameDefaultCacheStrategy;\r
\r
/**\r
private static final long serialVersionUID = -7111568277264140051L;\r
private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class);\r
\r
- private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;\r
-\r
private static final String pluralString = "taxa";\r
private static final String dbTableName = "tu";\r
private static final Class<?> cdmTargetClass = TaxonBase.class;\r
\r
+ private static Map<String, Integer> unacceptReasons = new HashMap<>();\r
+\r
+ private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping;\r
+\r
public ErmsTaxonImport(){\r
super(pluralString, dbTableName, cdmTargetClass);\r
}\r
MarkerType hasNoLastActionMarkerType = getMarkerType(DbLastActionMapper.uuidMarkerTypeHasNoLastAction, "has no last action", "No last action information available", "no last action");\r
mapping.addMapper(DbImportAnnotationMapper.NewInstance("lastAction", lastActionType, hasNoLastActionMarkerType));\r
\r
+ //MAN authorshipCache => appendedPhrase\r
+ mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "appendedPhraseForMisapplications", ErmsImportState.class));\r
+\r
//titleCache compare\r
mapping.addMapper(DbImportMethodMapper.NewDefaultInstance(this, "testTitleCache", ErmsImportState.class));\r
\r
- //not yet implemented\r
- mapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_sp", "included in rank/object creation"));\r
-\r
//ignore\r
+ mapping.addMapper(DbIgnoreMapper.NewInstance("tu_sp", "included in rank/object creation, only needed for defining kingdom"));\r
mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fossil", "tu_fossil implemented as foreign key"));\r
\r
}\r
\r
//first path\r
super.doInvoke(state);\r
+ if(true){\r
+ logUnacceptReasons();\r
+ }\r
return;\r
}\r
\r
- Integer lastTaxonId = null;\r
+ Integer lastTaxonId = null;\r
@Override\r
protected boolean ignoreRecord(ResultSet rs) throws SQLException {\r
Integer id = rs.getInt("id");\r
String distributionTable = "dr";\r
String notesTable = "notes";\r
String sql =\r
- " SELECT id FROM tu WHERE tu_accfinal is NULL" //id of taxa not having accepted taxon\r
+ " SELECT id FROM tu WHERE tu_accfinal is NULL" //id of taxa not having accepted taxon\r
+ " UNION SELECT DISTINCT tu_accfinal FROM tu " //fk to accepted taxon (either the accepted taxon or the taxon itself, if accepted)\r
+ + " UNION SELECT id FROM tu WHERE trim(tu.tu_unacceptreason) like 'misidentification' OR trim(tu.tu_unacceptreason) like 'misidentifications' OR "\r
+ + " tu.tu_unacceptreason like 'misapplied %%name' OR "\r
+ + " tu.tu_unacceptreason like '%%misapplication%%' OR "\r
+ + " tu.tu_unacceptreason like 'incorrect identification%%'" //Misapplications, see ErmsTransformer.getSynonymRelationTypesByKey\r
+ " UNION SELECT syn.id FROM tu syn INNER JOIN tu acc ON syn.tu_accfinal = acc.id WHERE syn.id = acc.tu_parent AND acc.id <> syn.id " //see also ErmsTaxonRelationImport.isAccepted, there are some autonyms being the accepted taxon of there own parents\r
+ " UNION SELECT DISTINCT %s FROM %s " //vernaculars\r
- + " UNION SELECT DISTINCT %s FROM %s " //distributions\r
- + " UNION SELECT DISTINCT %s FROM %s "; //notes\r
+ + " UNION SELECT DISTINCT %s FROM %s " //distributions\r
+ + " UNION SELECT DISTINCT %s FROM %s "; //notes\r
sql = String.format(sql,\r
tuFk, vernacularsTable,\r
tuFk, distributionTable,\r
@Override\r
public TaxonBase<?> createObject(ResultSet rs, ErmsImportState state) throws SQLException {\r
int statusId = rs.getInt("status_id");\r
-// Object accTaxonId = rs.getObject("tu_accfinal");\r
Integer meId = rs.getInt("id");\r
+ Integer accFinal = nullSafeInt(rs, "tu_accfinal");\r
\r
TaxonName taxonName = getTaxonName(rs, state);\r
fillTaxonName(taxonName, rs, state, meId);\r
Taxon taxon = Taxon.NewInstance(taxonName, citation);\r
if (statusId != 1){\r
logger.info("Taxon created as taxon but has status <> 1 ("+statusId+"): " + meId);\r
- handleNotAcceptedTaxon(taxon, statusId, state, rs);\r
+ boolean idsDiffer = accFinal != null && !meId.equals(accFinal);\r
+ handleNotAcceptedTaxonStatus(taxon, statusId, idsDiffer, accFinal == null, state, rs);\r
}\r
result = taxon;\r
}else{\r
result = Synonym.NewInstance(taxonName, citation);\r
+ //real synonyms (id <> tu_accfinal) are always handled as "synonym" or "pro parte synonym"\r
+// handleNotAcceptedTaxonStatus(result, statusId, state, rs);\r
}\r
\r
handleNameStatus(result.getName(), rs, state);\r
private void handleNameStatus(TaxonName name, ResultSet rs, ErmsImportState state) throws SQLException {\r
NomenclaturalStatusType nomStatus = null;\r
int tuStatus = rs.getInt("tu_status");\r
- //the order is bottom up from SQL script as there values are overriden from top to bottom\r
+ //the order is bottom up from SQL script as their values are overridden from top to bottom\r
if (tuStatus == 8){\r
//species inquirenda\r
nomStatus = getNomenclaturalStatusType(state, ErmsTransformer.uuidNomStatusSpeciesInquirenda, "species inquirenda", "species inquirenda", null, Language.LATIN(), null);\r
taxonName.setNameCache(displayName);\r
logger.warn("Set name cache: " + displayName + "; id =" + meId);\r
}\r
- if (!taxonName.getNameCache().equals(displayName)){\r
+ if (!taxonName.getNameCache().equals(displayName) && !isErroneousSubgenus(taxonName, displayName)){\r
int pos = CdmUtils.diffIndex(taxonName.getNameCache(), displayName);\r
logger.warn("Computed name cache differs at "+pos+".\n Computed : " + taxonName.getNameCache()+"\n DisplayName: " +displayName);\r
taxonName.setNameCache(displayName, true);\r
return taxonName;\r
}\r
\r
+ private static boolean isErroneousSubgenus(TaxonName taxonName, String displayName) {\r
+ //this is an error in ERMS formatting in v2019 for ICNafp names, that hopefully soon will be corrected\r
+ return (Rank.SPECIES().equals(taxonName.getRank()) && displayName.contains(" subg. "));\r
+ }\r
+\r
+ @SuppressWarnings("unused") //used by MethodMapper\r
+ private static TaxonBase<?> appendedPhraseForMisapplications(ResultSet rs, ErmsImportState state) throws SQLException{\r
+ TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);\r
+ TaxonName taxonName = taxon.getName();\r
+ String unacceptreason = rs.getString("tu_unacceptreason");\r
+ RelationshipTermBase<?>[] rels = state.getTransformer().getSynonymRelationTypesByKey(unacceptreason, state);\r
+ if (rels[1]!= null && rels[1].equals(TaxonRelationshipType.MISAPPLIED_NAME_FOR())){\r
+ taxon.setAppendedPhrase(taxonName.getAuthorshipCache());\r
+ taxon.setSec(null);\r
+ taxonName.setAuthorshipCache(null, taxonName.isProtectedAuthorshipCache());\r
+ //TODO maybe some further authorship handling is needed if authors get parsed, but not very likely for MAN authorship\r
+ }\r
+ if(state.getUnhandledUnacceptReason() != null){\r
+ //to handle it hear is a workaround, as the real place where it is handled is DbImportSynonymMapper which is called ErmsTaxonRelationImport but where it is diffcult to aggregate logging data\r
+ addUnacceptReason(state.getUnhandledUnacceptReason());\r
+ }\r
+ return taxon;\r
+ }\r
+\r
+ private static void addUnacceptReason(String unhandledUnacceptReason) {\r
+ unhandledUnacceptReason = unhandledUnacceptReason.toLowerCase();\r
+ if (!unacceptReasons.keySet().contains(unhandledUnacceptReason)){\r
+ unacceptReasons.put(unhandledUnacceptReason, 1);\r
+ }else{\r
+ unacceptReasons.put(unhandledUnacceptReason, unacceptReasons.get(unhandledUnacceptReason)+1);\r
+ }\r
+ }\r
+\r
@SuppressWarnings("unused") //used by MethodMapper\r
private static TaxonBase<?> testTitleCache(ResultSet rs, ErmsImportState state) throws SQLException{\r
TaxonBase<?> taxon = (TaxonBase<?>)state.getRelatedObject(DbImportStateBase.CURRENT_OBJECT_NAMESPACE, DbImportStateBase.CURRENT_OBJECT_ID);\r
TaxonName taxonName = taxon.getName();\r
- String displayName = rs.getString("tu_displayname");\r
- displayName = displayName == null ? null : displayName.trim();\r
- String titleCache = taxonName.resetTitleCache(); //calling titleCache should always be kept to have a computed titleCache in the CDM DB.\r
- String expectedTitleCache = getExpectedTitleCache(rs);\r
- //TODO check titleCache, but beware of autonyms\r
- if (!titleCache.equals(expectedTitleCache)){\r
- int pos = CdmUtils.diffIndex(titleCache, expectedTitleCache);\r
- logger.warn("Computed title cache differs at "+pos+".\n Computed : " + titleCache + "\n DisplayName+Authority: " + expectedTitleCache);\r
- taxonName.setNameCache(displayName, true);\r
- }\r
- return taxon;\r
- }\r
-\r
- //see also PesiErmsValidation.srcFullName()\r
- private static String getExpectedTitleCache(ResultSet srcRs) throws SQLException {\r
+ String displayName = rs.getString("tu_displayname");\r
+ displayName = displayName == null ? null : displayName.trim();\r
+ String titleCache = taxonName.resetTitleCache(); //calling titleCache should always be kept to have a computed titleCache in the CDM DB.\r
+ titleCache = CdmUtils.concat(" ", titleCache, taxon.getAppendedPhrase());\r
+ String expectedTitleCache = getExpectedTitleCache(rs);\r
+ //TODO check titleCache, but beware of autonyms\r
+ if (!titleCache.equals(expectedTitleCache) && !isErroneousSubgenus(taxonName, displayName)){\r
+ int pos = CdmUtils.diffIndex(titleCache, expectedTitleCache);\r
+ logger.warn("Computed title cache differs at "+pos+".\n Computed : " + titleCache + "\n DisplayName+Authority: " + expectedTitleCache);\r
+ taxonName.setNameCache(displayName, true);\r
+ }\r
+ return taxon;\r
+ }\r
+\r
+ //see also PesiErmsValidation.srcFullName()\r
+ private static String getExpectedTitleCache(ResultSet srcRs) throws SQLException {\r
String result;\r
String epi = srcRs.getString("tu_name");\r
epi = " a" + epi;\r
return result.trim();\r
}\r
\r
- private void handleNotAcceptedTaxon(Taxon taxon, int statusId, ErmsImportState state, ResultSet rs) throws SQLException {\r
- ExtensionType notAccExtensionType = getExtensionType(state, ErmsTransformer.uuidErmsTaxonStatus, "ERMS taxon status", "ERMS taxon status", "status", null);\r
- String statusName = rs.getString("status_name");\r
-\r
- if (statusId > 1){\r
- taxon.addExtension(statusName, notAccExtensionType);\r
- }\r
+ private void handleNotAcceptedTaxonStatus(Taxon taxon, int statusId, boolean idsDiffer, boolean accIdNull, ErmsImportState state, ResultSet rs) throws SQLException {\r
+ ExtensionType pesiStatusType = getExtensionType(state, ErmsTransformer.uuidPesiTaxonStatus, "PESI taxon status", "PESI taxon status", "status", null);\r
+\r
+ if(idsDiffer){\r
+ //if ids differ the taxon should always be an ordinary synonym, some synonyms need to be imported to CDM as Taxon because they have factual data attached, they use a concept relationship as synonym relationship\r
+ addPesiStatus(taxon, PesiTransformer.T_STATUS_SYNONYM, pesiStatusType);\r
+ }else if(statusId == 1){\r
+ //nothing to do, not expected to happen\r
+ }else if (statusId > 1 && statusId < 6 || statusId == 7){ //unaccepted, nomen nudum, alternate representation, temporary name they have sometimes no tu_accfinal or are handled incorrect\r
+ //TODO discuss alternate representations, at the very end of the PESI export unaccepted taxa with relationship "is alternative name for" are set to status "accepted". Need to check if this is true for the PESI taxa too (do they have such a relationship?)\r
+ //Note: in SQL script, also the tu_unacceptreason was checked to be NOT LIKE '%syno%', this is not always correct and the few real synonyms should better data cleaned\r
+ addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType);\r
+ }else if (statusId == 6 || statusId == 8 || statusId == 10){\r
+ taxon.setDoubtful(true); //nomen dubium, taxon inquirendum, uncertain\r
+ }else if (statusId == 9){\r
+ addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType); //interim unpublished, we should better not yet publish, but will be probably accepted in future\r
+ }else{\r
+ logger.error("Unhandled statusId "+ statusId);\r
+ }\r
}\r
\r
- private void handleException(Integer parent1Rank, TaxonName taxonName, String displayName, Integer meId) {\r
- logger.warn("Parent of infra specific taxon is higher than species. Used nameCache: " + displayName + "; id=" + meId) ;\r
+ private void addPesiStatus(Taxon taxon, int status, ExtensionType pesiStatusType) {\r
+ taxon.addExtension(String.valueOf(status), pesiStatusType);\r
+\r
+ }\r
+\r
+ private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) {\r
+ logger.warn("Parent of infra specific taxon is of higher rank ("+parentRank+") than species. Used nameCache: " + displayName + "; id=" + meId) ;\r
taxonName.setNameCache(displayName);\r
}\r
\r
}\r
}\r
\r
+ private void logUnacceptReasons() {\r
+ String logStr = "\n Unhandled unaccept reasons:\n===================";\r
+\r
+ while (!unacceptReasons.isEmpty()) {\r
+ int n = 0;\r
+ List<String> mostUsedStrings = new ArrayList<>();\r
+ for (Map.Entry<String, Integer> entry : unacceptReasons.entrySet()) {\r
+ if (entry.getValue() > n) {\r
+ mostUsedStrings = new ArrayList<>();\r
+ mostUsedStrings.add(entry.getKey());\r
+ n = entry.getValue();\r
+ } else if (entry.getValue() == n) {\r
+ mostUsedStrings.add(entry.getKey());\r
+ } else {\r
+ //neglect\r
+ }\r
+ }\r
+ mostUsedStrings.sort(new StringComparator());\r
+ logStr += "\n " + String.valueOf(n);\r
+ for (String str : mostUsedStrings) {\r
+ logStr += "\n "+ str;\r
+ unacceptReasons.remove(str);\r
+ }\r
+ }\r
+ logger.warn(logStr);\r
+\r
+ }\r
+\r
@Override\r
protected boolean doCheck(ErmsImportState state){\r
IOValidator<ErmsImportState> validator = new ErmsTaxonImportValidator();\r