Revision 3d6c7bfc
Added by Andreas Müller over 4 years ago
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/validate/PesiErmsValidator.java | ||
---|---|---|
419 | 419 |
int taxonStatusFk = destRS.getInt("TaxonStatusFk"); |
420 | 420 |
String parentTaxonId = destRS.getString("parentTaxonFk"); |
421 | 421 |
int rankFk = destRS.getInt("RankFk"); |
422 |
if (taxonStatusFk == 2 || taxonStatusFk == 4 || taxonStatusFk == 7|| rankFk <= 10){ //synonym; pro parte syn; kingdom and higher
|
|
422 |
if (taxonStatusFk == 2 || taxonStatusFk == 4 || rankFk <= 10){ //synonym; pro parte syn; kingdom and higher |
|
423 | 423 |
result = isNull(childIndexAttr, destRS, id); |
424 | 424 |
}else{ |
425 | 425 |
String childIndex = destRS.getString(childIndexAttr); |
... | ... | |
850 | 850 |
return 5; |
851 | 851 |
}else if ("p".equals(sourceType)){ |
852 | 852 |
return 11; |
853 |
}else if ("i".equals(sourceType)){ |
|
854 |
return 12; |
|
855 | 853 |
} |
856 | 854 |
return null; |
857 | 855 |
} |
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTaxonImport.java | ||
---|---|---|
72 | 72 |
private static final long serialVersionUID = -7111568277264140051L; |
73 | 73 |
private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class); |
74 | 74 |
|
75 |
private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping; |
|
76 |
|
|
77 | 75 |
private static final String pluralString = "taxa"; |
78 | 76 |
private static final String dbTableName = "tu"; |
79 | 77 |
private static final Class<?> cdmTargetClass = TaxonBase.class; |
80 | 78 |
|
81 | 79 |
private static Map<String, Integer> unacceptReasons = new HashMap<>(); |
82 | 80 |
|
81 |
private DbImportMapping<ErmsImportState, ErmsImportConfigurator> mapping; |
|
82 |
|
|
83 | 83 |
public ErmsTaxonImport(){ |
84 | 84 |
super(pluralString, dbTableName, cdmTargetClass); |
85 | 85 |
} |
... | ... | |
240 | 240 |
@Override |
241 | 241 |
public TaxonBase<?> createObject(ResultSet rs, ErmsImportState state) throws SQLException { |
242 | 242 |
int statusId = rs.getInt("status_id"); |
243 |
// Object accTaxonId = rs.getObject("tu_accfinal"); |
|
244 | 243 |
Integer meId = rs.getInt("id"); |
244 |
Integer accFinal = nullSafeInt(rs, "tu_accfinal"); |
|
245 | 245 |
|
246 | 246 |
TaxonName taxonName = getTaxonName(rs, state); |
247 | 247 |
fillTaxonName(taxonName, rs, state, meId); |
... | ... | |
256 | 256 |
Taxon taxon = Taxon.NewInstance(taxonName, citation); |
257 | 257 |
if (statusId != 1){ |
258 | 258 |
logger.info("Taxon created as taxon but has status <> 1 ("+statusId+"): " + meId); |
259 |
handleNotAcceptedTaxon(taxon, statusId, state, rs); |
|
259 |
boolean idsDiffer = accFinal != null && !meId.equals(accFinal); |
|
260 |
handleNotAcceptedTaxonStatus(taxon, statusId, idsDiffer, accFinal == null, state, rs); |
|
260 | 261 |
} |
261 | 262 |
result = taxon; |
262 | 263 |
}else{ |
263 | 264 |
result = Synonym.NewInstance(taxonName, citation); |
265 |
//real synonyms (id <> tu_accfinal) are always handled as "synonym" or "pro parte synonym" |
|
266 |
// handleNotAcceptedTaxonStatus(result, statusId, state, rs); |
|
264 | 267 |
} |
265 | 268 |
|
266 | 269 |
handleNameStatus(result.getName(), rs, state); |
... | ... | |
429 | 432 |
return result.trim(); |
430 | 433 |
} |
431 | 434 |
|
432 |
private void handleNotAcceptedTaxon(Taxon taxon, int statusId, ErmsImportState state, ResultSet rs) throws SQLException { |
|
433 |
ExtensionType notAccExtensionType = getExtensionType(state, ErmsTransformer.uuidErmsTaxonStatus, "ERMS taxon status", "ERMS taxon status", "status", null); |
|
434 |
String statusName = rs.getString("status_name"); |
|
435 |
|
|
436 |
if (statusId > 1){ |
|
437 |
taxon.addExtension(statusName, notAccExtensionType); |
|
438 |
} |
|
435 |
private void handleNotAcceptedTaxonStatus(Taxon taxon, int statusId, boolean idsDiffer, boolean accIdNull, ErmsImportState state, ResultSet rs) throws SQLException { |
|
436 |
ExtensionType pesiStatusType = getExtensionType(state, ErmsTransformer.uuidPesiTaxonStatus, "PESI taxon status", "PESI taxon status", "status", null); |
|
437 |
|
|
438 |
if(idsDiffer){ |
|
439 |
//if ids differ the taxon should always be an ordinary synonym, some synonyms need to be imported to CDM as Taxon because they have factual data attached, they use a concept relationship as synonym relationship |
|
440 |
addPesiStatus(taxon, PesiTransformer.T_STATUS_SYNONYM, pesiStatusType); |
|
441 |
}else if(statusId == 1){ |
|
442 |
//nothing to do, not expected to happen |
|
443 |
}else if (statusId > 1 && statusId < 6 || statusId == 7){ //unaccepted, nomen nudum, alternate representation, temporary name they have sometimes no tu_accfinal or are handled incorrect |
|
444 |
//TODO discuss alternate representations, at the very end of the PESI export unaccepted taxa with relationship "is alternative name for" are set to status "accepted". Need to check if this is true for the PESI taxa too (do they have such a relationship?) |
|
445 |
//Note: in SQL script, also the tu_unacceptreason was checked to be NOT LIKE '%syno%', this is not always correct and the few real synonyms should better data cleaned |
|
446 |
addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType); |
|
447 |
}else if (statusId == 6 || statusId == 8 || statusId == 10){ |
|
448 |
taxon.setDoubtful(true); //nomen dubium, taxon inquirendum, uncertain |
|
449 |
}else if (statusId == 9){ |
|
450 |
addPesiStatus(taxon, PesiTransformer.T_STATUS_UNACCEPTED, pesiStatusType); //interim unpublished, we should better not yet publish, but will be probably accepted in future |
|
451 |
}else{ |
|
452 |
logger.error("Unhandled statusId "+ statusId); |
|
453 |
} |
|
439 | 454 |
} |
440 | 455 |
|
441 |
private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) { |
|
456 |
private void addPesiStatus(Taxon taxon, int status, ExtensionType pesiStatusType) { |
|
457 |
taxon.addExtension(String.valueOf(status), pesiStatusType); |
|
458 |
|
|
459 |
} |
|
460 |
|
|
461 |
private void handleException(Integer parentRank, TaxonName taxonName, String displayName, Integer meId) { |
|
442 | 462 |
logger.warn("Parent of infra specific taxon is of higher rank ("+parentRank+") than species. Used nameCache: " + displayName + "; id=" + meId) ; |
443 | 463 |
taxonName.setNameCache(displayName); |
444 | 464 |
} |
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/ErmsTransformer.java | ||
---|---|---|
249 | 249 |
public static final UUID uuidBiology = UUID.fromString("af5c6832-74f3-4b87-bac9-6fdfc68ffada"); |
250 | 250 |
|
251 | 251 |
//extension type uuids |
252 |
public static final UUID uuidErmsTaxonStatus = UUID.fromString("859eee7f-5240-48a0-8edc-7af63557fa6e");
|
|
252 |
public static final UUID uuidPesiTaxonStatus = UUID.fromString("859eee7f-5240-48a0-8edc-7af63557fa6e");
|
|
253 | 253 |
public static final UUID uuidExtGazetteer = UUID.fromString("dcfa124a-1028-49cd-aea5-fdf9bd396c1a"); |
254 | 254 |
public static final UUID uuidExtImis = UUID.fromString("ee2ac2ca-b60c-4e6f-9cad-720fcdb0a6ae"); |
255 | 255 |
public static final UUID uuidExtFossilStatus = UUID.fromString("ec3dffbe-a0c8-4d76-845f-5fc166a33d5b"); |
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/out/PesiTransformer.java | ||
---|---|---|
1898 | 1898 |
public static Integer taxonBase2statusFk (TaxonBase<?> taxonBase){ |
1899 | 1899 |
if (taxonBase == null){ |
1900 | 1900 |
return null; |
1901 |
} |
|
1902 |
if (taxonBase.isInstanceOf(Taxon.class)){ |
|
1903 |
Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class); |
|
1904 |
Set<TaxonRelationship> rels = taxon.getRelationsFromThisTaxon(); |
|
1905 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
1906 |
if (!rels.isEmpty() && !nodes.isEmpty()){ |
|
1907 |
logger.warn("Taxon has relations and parent. This is not expected in E+M, but maybe possible in ERMS. Check if taxon status is correct."); |
|
1908 |
}else if (rels.isEmpty() && nodes.isEmpty()){ |
|
1909 |
logger.warn("Taxon has neither relations nor parent. This is not expected. Check if taxon status is correct."); |
|
1910 |
} |
|
1911 |
if (!rels.isEmpty()){ |
|
1912 |
//we expect all rels to have same type, maybe not true |
|
1913 |
UUID relTypeUuid = rels.iterator().next().getType().getUuid(); |
|
1914 |
//E+M |
|
1915 |
if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){ |
|
1916 |
return T_STATUS_PRO_PARTE_SYN; |
|
1917 |
}else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){ |
|
1918 |
return T_STATUS_PARTIAL_SYN; |
|
1919 |
}else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){ |
|
1920 |
return T_STATUS_SYNONYM; //no explicit MAN status exists in PESI |
|
1921 |
} |
|
1922 |
//ERMS |
|
1923 |
else if (TaxonRelationshipType.pseudoTaxonUuids().contains(relTypeUuid)){ |
|
1924 |
return T_STATUS_SYNONYM; |
|
1925 |
} |
|
1926 |
} |
|
1927 |
if (!nodes.isEmpty()){ |
|
1928 |
TaxonNode parentNode = nodes.iterator().next().getParent(); |
|
1929 |
if (parentNode.getTaxon() != null && !parentNode.getTaxon().isPublish()){ |
|
1930 |
if (parentNode.getTaxon().getUuid().equals(uuidTaxonValuelessEuroMed) ){ |
|
1931 |
return T_STATUS_NOT_ACCEPTED_VALUELESS; |
|
1932 |
} |
|
1933 |
}else{ |
|
1934 |
return T_STATUS_ACCEPTED; |
|
1935 |
} |
|
1936 |
} |
|
1937 |
logger.error("Taxon status could not be defined. This should not happen: " + taxonBase.getTitleCache() ); |
|
1938 |
return T_STATUS_UNRESOLVED; |
|
1939 |
}else if (taxonBase.isInstanceOf(Synonym.class)){ |
|
1940 |
Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class); |
|
1941 |
if (taxonBase2statusFk(synonym.getAcceptedTaxon())== T_STATUS_NOT_ACCEPTED_VALUELESS ){ |
|
1942 |
return T_STATUS_NOT_ACCEPTED_VALUELESS; |
|
1943 |
}else{ |
|
1944 |
return T_STATUS_SYNONYM; |
|
1945 |
} |
|
1901 |
}else if(!taxonBase.getExtensions(ErmsTransformer.uuidPesiTaxonStatus).isEmpty()){ |
|
1902 |
return taxonStatusByErmsStatus(taxonBase.getExtensions(ErmsTransformer.uuidPesiTaxonStatus)); |
|
1946 | 1903 |
}else{ |
1947 |
logger.warn("Unresolved taxon status."); |
|
1948 |
return T_STATUS_UNRESOLVED; |
|
1904 |
if (taxonBase.isInstanceOf(Synonym.class)){ |
|
1905 |
Synonym synonym = CdmBase.deproxy(taxonBase, Synonym.class); |
|
1906 |
if (taxonBase2statusFk(synonym.getAcceptedTaxon())== T_STATUS_NOT_ACCEPTED_VALUELESS ){ |
|
1907 |
return T_STATUS_NOT_ACCEPTED_VALUELESS; |
|
1908 |
}else{ |
|
1909 |
return T_STATUS_SYNONYM; |
|
1910 |
} |
|
1911 |
}else if (taxonBase.isInstanceOf(Taxon.class)){ |
|
1912 |
Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class); |
|
1913 |
Set<TaxonRelationship> rels = taxon.getRelationsFromThisTaxon(); |
|
1914 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
1915 |
if (!rels.isEmpty() && !nodes.isEmpty()){ |
|
1916 |
logger.warn("Taxon has relations and parent. This is not expected in E+M, but maybe possible in ERMS. Check if taxon status is correct."); |
|
1917 |
}else if (rels.isEmpty() && nodes.isEmpty()){ |
|
1918 |
logger.warn("Taxon has neither relations nor parent. This is not expected. Check if taxon status is correct."); |
|
1919 |
} |
|
1920 |
if (!rels.isEmpty()){ |
|
1921 |
//we expect all rels to have same type, maybe not true |
|
1922 |
UUID relTypeUuid = rels.iterator().next().getType().getUuid(); |
|
1923 |
//E+M |
|
1924 |
if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){ |
|
1925 |
return T_STATUS_PRO_PARTE_SYN; |
|
1926 |
}else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){ |
|
1927 |
return T_STATUS_PARTIAL_SYN; |
|
1928 |
}else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){ |
|
1929 |
return T_STATUS_SYNONYM; //no explicit MAN status exists in PESI |
|
1930 |
} |
|
1931 |
//ERMS |
|
1932 |
else if (TaxonRelationshipType.pseudoTaxonUuids().contains(relTypeUuid)){ |
|
1933 |
return T_STATUS_SYNONYM; |
|
1934 |
} |
|
1935 |
} |
|
1936 |
if (!nodes.isEmpty()){ |
|
1937 |
TaxonNode parentNode = nodes.iterator().next().getParent(); |
|
1938 |
if (parentNode.getTaxon() != null && !parentNode.getTaxon().isPublish()){ |
|
1939 |
if (parentNode.getTaxon().getUuid().equals(uuidTaxonValuelessEuroMed) ){ |
|
1940 |
return T_STATUS_NOT_ACCEPTED_VALUELESS; |
|
1941 |
} |
|
1942 |
}else{ |
|
1943 |
if (taxon.isDoubtful()){ |
|
1944 |
return T_STATUS_UNRESOLVED; |
|
1945 |
}else{ |
|
1946 |
return T_STATUS_ACCEPTED; |
|
1947 |
} |
|
1948 |
} |
|
1949 |
} |
|
1950 |
logger.error("Taxon status could not be defined. This should not happen: " + taxonBase.getTitleCache() ); |
|
1951 |
return T_STATUS_UNRESOLVED; |
|
1952 |
}else{ |
|
1953 |
logger.warn("Unresolved taxon status, neither taxon nor synonym, this should not happen"); |
|
1954 |
return T_STATUS_UNRESOLVED; |
|
1955 |
} |
|
1956 |
//TODO |
|
1957 |
// public static int T_STATUS_ORPHANED = 6; //never used in SQL import |
|
1949 | 1958 |
} |
1950 |
//TODO |
|
1951 |
// public static int T_STATUS_UNRESOLVED = 5; |
|
1952 |
// public static int T_STATUS_ORPHANED = 6; |
|
1953 | 1959 |
} |
1954 | 1960 |
|
1955 |
// /** |
|
1956 |
// * |
|
1957 |
// * @param taxonBase |
|
1958 |
// * @return |
|
1959 |
// */ |
|
1960 |
// public static String taxonBase2statusCache (TaxonBase<?> taxonBase){ |
|
1961 |
// if (taxonBase == null){return null;} |
|
1962 |
// if (taxonBase.isInstanceOf(Taxon.class)){ |
|
1963 |
// Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class); |
|
1964 |
// if (taxon.getTaxonNodes().size() == 0){ |
|
1965 |
// return T_STATUS_STR_NOT_ACCEPTED; |
|
1966 |
// }else{ |
|
1967 |
// return T_STATUS_STR_ACCEPTED; |
|
1968 |
// } |
|
1969 |
// }else if (taxonBase.isInstanceOf(Synonym.class)){ |
|
1970 |
// return T_STATUS_STR_SYNONYM; |
|
1971 |
// }else{ |
|
1972 |
// logger.warn("Unknown "); |
|
1973 |
// return T_STATUS_STR_UNRESOLVED; |
|
1974 |
// } |
|
1975 |
// //TODO |
|
1976 |
// public static int T_STATUS_STR_PARTIAL_SYN = 3; |
|
1977 |
// public static int T_STATUS_STR_PRO_PARTE_SYN = 4; |
|
1978 |
// public static int T_STATUS_STR_UNRESOLVED = 5; |
|
1979 |
// public static int T_STATUS_STR_ORPHANED = 6; |
|
1980 |
// } |
|
1961 |
private static Integer taxonStatusByErmsStatus(Set<String> extensions) { |
|
1962 |
//Note: status extensions should only be used during ERMS import |
|
1963 |
// if the status can not be derived from ordinary CDM status handling (Taxon, Synonym, ProParte, doubtful, ...) |
|
1964 |
// see ErmsTaxonImport.handleNotAcceptedTaxonStatus |
|
1965 |
if (extensions.size()>1){ |
|
1966 |
logger.warn("More than 1 ERMS status available. This should not happen."); |
|
1967 |
} |
|
1968 |
String statusStr = extensions.iterator().next(); |
|
1969 |
Integer status = Integer.valueOf(statusStr); |
|
1970 |
return status; |
|
1971 |
} |
|
1981 | 1972 |
|
1982 |
/**
|
|
1973 |
/**
|
|
1983 | 1974 |
* Returns the {@link SourceCategory SourceCategory} representation of the given {@link ReferenceType ReferenceType} in PESI. |
1984 | 1975 |
* @param reference The {@link Reference Reference}. |
1985 | 1976 |
* @return The {@link SourceCategory SourceCategory} representation in PESI. |
Also available in: Unified diff
ref #1444, ref #8508, ref #8509 fix ERMS taxon status handling