Revision f859201a
Added by Andreas Müller over 4 years ago
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/validate/PesiErmsValidator.java | ||
---|---|---|
32 | 32 |
private static final Logger logger = Logger.getLogger(PesiErmsValidator.class); |
33 | 33 |
|
34 | 34 |
private static final Source defaultSource = PesiSources.PESI2019_ERMS_2019(); |
35 |
// private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI();
|
|
36 |
private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI_2(); |
|
35 |
private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI(); |
|
36 |
// private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI_2();
|
|
37 | 37 |
|
38 | 38 |
private Source source = defaultSource; |
39 | 39 |
private Source destination = defaultDestination; |
... | ... | |
261 | 261 |
private boolean testSingleTaxa(int n) throws SQLException { |
262 | 262 |
boolean success = true; |
263 | 263 |
ResultSet srcRS = source.getResultSet("" |
264 |
+ " SELECT t.*, tu1.tu_name tu1_name, r.rank_name, acc.tu_sp as acc_sp, st.status_name, " |
|
264 |
+ " SELECT t.*, pt.tu_name pt_name, pt.id pId, pt.tu_accfinal pAccId, " |
|
265 |
+ " acc.tu_sp as acc_sp, accP.id accPId, " |
|
266 |
+ " r.rank_name, st.status_name, " |
|
265 | 267 |
+ " type.tu_displayname typename, type.tu_authority typeauthor, " |
266 | 268 |
+ " fo.fossil_name, qs.qualitystatus_name " |
267 | 269 |
+ " FROM tu t " |
268 |
+ " LEFT JOIN tu as tu1 on t.tu_parent = tu1.id "
|
|
270 |
+ " LEFT JOIN tu as pt on t.tu_parent = pt.id "
|
|
269 | 271 |
+ " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 30 AND rank_name = 'Phylum (Division)' OR rank_id = 40 AND rank_name = 'Subphylum (Subdivision)' OR rank_id = 122 AND rank_name='Subsection')) as r ON t.tu_rank = r.rank_id " |
270 | 272 |
+ " LEFT JOIN tu acc ON acc.id = t.tu_accfinal " |
273 |
+ " LEFT JOIN tu accP ON acc.tu_parent = accP.id " |
|
271 | 274 |
+ " LEFT JOIN status st ON st.status_id = t.tu_status " |
272 | 275 |
+ " LEFT JOIN tu type ON type.id = t.tu_typetaxon " |
273 | 276 |
+ " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id " |
... | ... | |
276 | 279 |
+ " ORDER BY CAST(t.id as nvarchar(20)) "); |
277 | 280 |
ResultSet destRS = destination.getResultSet("SELECT t.*, " |
278 | 281 |
+ " pt.GenusOrUninomial p_GenusOrUninomial, pt.InfraGenericEpithet p_InfraGenericEpithet, pt.SpecificEpithet p_SpecificEpithet, " |
279 |
+ " pt.treeIndex pTreeIndex, " |
|
282 |
+ " pt.treeIndex pTreeIndex, pt.IdInSource pIdInSource, "
|
|
280 | 283 |
+ " s.Name as sourceName, type.IdInSource typeSourceId, r.Rank " |
281 | 284 |
+ " FROM Taxon t " |
282 | 285 |
+ " LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk " |
... | ... | |
314 | 317 |
success &= compareNameParts(srcRS, destRS, id); |
315 | 318 |
|
316 | 319 |
success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id); |
317 |
//TODO webShowName success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id); |
|
320 |
//in ERMS displayName and webShowName should be equal as we do not have a correctly formatted nom. ref. |
|
321 |
// success &= equals("Taxon WebShowName", srcDisplayName(srcRS), destRS.getString("WebShowName"), id); |
|
318 | 322 |
success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id); |
319 | 323 |
// success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id); |
320 |
success &= isNull("NomRefString", destRS); |
|
324 |
success &= isNull("NomRefString", destRS, id);
|
|
321 | 325 |
// success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id); //according to SQL script same as FullName, no nom.ref. information attached |
322 | 326 |
|
323 | 327 |
//TODO nameStatusFk success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id); |
324 | 328 |
//TODO nameStatusCache success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id); |
325 | 329 |
|
326 |
success &= equals("Taxon TaxonStatusFk", makePesiTaxonStatus(srcRS, "tu_status"),nullSafeInt( destRS,"TaxonStatusFk"), id);
|
|
327 |
/*//TODO taxonStatusCache success &= equals("Taxon TaxonStatusCache", srcRS.getString("status_name"), destRS.getString("TaxonStatusCache"), id);
|
|
328 |
*/ |
|
329 |
//TODO ParentTaxonFk
|
|
330 |
// success &= equals("Taxon TaxonStatusFk", normalizeTaxonStatusFk(srcRS),nullSafeInt( destRS,"TaxonStatusFk"), id);
|
|
331 |
// success &= equals("Taxon TaxonStatusCache", normalizeTaxonStatusCache(srcRS), destRS.getString("TaxonStatusCache"), id);
|
|
332 |
|
|
333 |
success &= equals("Taxon ParentTaxonFk", srcParentTaxonFk(srcRS), destParentIdInSource(destRS), id);
|
|
330 | 334 |
Integer orgigTypeNameFk = nullSafeInt(srcRS, "tu_typetaxon"); |
331 | 335 |
success &= equals("Taxon TypeNameFk", orgigTypeNameFk == null? null : "tu_id: " + orgigTypeNameFk, destRS.getString("typeSourceId"), id); |
332 | 336 |
//TODO success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id); |
333 | 337 |
success &= equals("Taxon QualityStatusFK", nullSafeInt(srcRS, "tu_qualitystatus"),nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id)); |
334 | 338 |
success &= equals("Taxon QualityStatusCache", srcRS.getString("qualitystatus_name"), destRS.getString("QualityStatusCache"), id); |
335 |
success &= testTreeIndex(destRS, ("TreeIndex"), ("pTreeIndex"), id);
|
|
339 |
success &= testTreeIndex(destRS, "TreeIndex", "pTreeIndex", id);
|
|
336 | 340 |
success &= equals("Taxon FossilStatusFk", nullSafeInt(srcRS, "tu_fossil"),nullSafeInt( destRS,"FossilStatusFk"), String.valueOf(id)); |
337 | 341 |
success &= equals("Taxon FossilStatusCache", srcRS.getString("fossil_name"), destRS.getString("FossilStatusCache"), id); |
338 | 342 |
success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id); |
339 | 343 |
success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases |
340 |
success &= isNull("ExpertGUID", destRS); //only relevant after merge |
|
341 |
success &= isNull("ExpertName", destRS); //only relevant after merge |
|
342 |
success &= isNull("SpeciesExpertGUID", destRS); //only relevant after merge |
|
344 |
success &= isNull("ExpertGUID", destRS, id); //only relevant after merge
|
|
345 |
success &= isNull("ExpertName", destRS, id); //only relevant after merge
|
|
346 |
success &= isNull("SpeciesExpertGUID", destRS, id); //only relevant after merge
|
|
343 | 347 |
success &= equals("Taxon cache citation", srcRS.getString("cache_citation"), destRS.getString("CacheCitation"), id); |
344 | 348 |
//LastAction(Date) handled in separate method |
345 |
success &= isNull("GUID2", destRS); //only relevant after merge |
|
346 |
success &= isNull("DerivedFromGuid2", destRS); //only relevant after merge |
|
349 |
success &= isNull("GUID2", destRS, id); //only relevant after merge
|
|
350 |
success &= isNull("DerivedFromGuid2", destRS, id); //only relevant after merge
|
|
347 | 351 |
return success; |
348 | 352 |
} |
349 | 353 |
|
350 |
private Integer makePesiTaxonStatus(ResultSet srcRS, String string) throws SQLException { |
|
354 |
private Integer destParentIdInSource(ResultSet destRS) throws SQLException { |
|
355 |
String parentIdInSource = destRS.getString("pIdInSource"); |
|
356 |
if (parentIdInSource == null){ |
|
357 |
return null; |
|
358 |
}else{ |
|
359 |
String idStr = parentIdInSource.replace("tu_id:", "").trim(); |
|
360 |
Integer result = Integer.valueOf(idStr); |
|
361 |
return result; |
|
362 |
} |
|
363 |
} |
|
364 |
|
|
365 |
private Integer srcParentTaxonFk(ResultSet srcRS) throws SQLException { |
|
366 |
Integer id = nullSafeInt(srcRS,"id"); |
|
367 |
Integer accId = nullSafeInt(srcRS, "tu_accfinal"); |
|
368 |
Integer pId = nullSafeInt(srcRS, "pId"); |
|
369 |
Integer pAccId = nullSafeInt(srcRS, "pAccId"); |
|
370 |
Integer accPId = nullSafeInt(srcRS, "accPId"); //parent of accepted taxon |
|
371 |
|
|
372 |
if (accId != null && !id.equals(accId)){ |
|
373 |
if (id.equals(accPId)){ |
|
374 |
return pId; //exceptional handling to avoid recursion mostly for some autonyms and alternate representations |
|
375 |
}else{ |
|
376 |
return null; //taxon is not accepted |
|
377 |
} |
|
378 |
}else{ |
|
379 |
if (id == 1){ |
|
380 |
return null; //Biota |
|
381 |
}else if (pAccId == null){ |
|
382 |
return pId; //handle parent preliminary as accepted |
|
383 |
}else if (id.equals(pAccId)){ |
|
384 |
return pId; //exceptional handling to avoid recursion mostly for some autonyms and alternate representations |
|
385 |
}else{ |
|
386 |
return pAccId; |
|
387 |
} |
|
388 |
} |
|
389 |
} |
|
390 |
|
|
391 |
private Integer normalizeTaxonStatusFk(ResultSet srcRS) throws SQLException { |
|
351 | 392 |
Integer status = nullSafeInt(srcRS, "tu_status"); |
352 |
if(status == 1 || status == 2){ //accepted, unaccepted
|
|
353 |
return status;
|
|
354 |
}else if (status == 3 || status == 6 || status == 7){ //nomen nudum, nomen dubium, temporary name
|
|
393 |
if(status == 1 || status == 6 || status == 7 || status == 8 || status == 9 || status == 10 ){ //accepted, nomen dubium, temporary name, taxon inquirendum (status uncertain), interim unpublished, uncertain
|
|
394 |
return 1;
|
|
395 |
}else if (status == 2 || status == 3 || status == 5){ //unaccepted, nomen nudum, alternate representation
|
|
355 | 396 |
return 2; |
356 | 397 |
} |
398 |
//4 does not exist and should not happen |
|
357 | 399 |
return -1; |
358 | 400 |
} |
359 | 401 |
|
402 |
private String normalizeTaxonStatusCache(ResultSet srcRS) throws SQLException { |
|
403 |
Integer status = normalizeTaxonStatusFk(srcRS); |
|
404 |
if (status == 1){ |
|
405 |
return "accepted"; |
|
406 |
}else if (status == 2){ |
|
407 |
return "synonym"; |
|
408 |
}else if (status == 4){ |
|
409 |
return "pro parte synonym"; |
|
410 |
}else if (status == 7){ |
|
411 |
return "unaccepted"; |
|
412 |
}else{ |
|
413 |
return "xxx - not yet handled"; |
|
414 |
} |
|
415 |
} |
|
416 |
|
|
360 | 417 |
private boolean testTreeIndex(ResultSet destRS, String childIndexAttr, String parentIndexAttr, String id) throws SQLException { |
361 | 418 |
boolean result; |
362 | 419 |
int taxonStatusFk = destRS.getInt("TaxonStatusFk"); |
363 | 420 |
String parentTaxonId = destRS.getString("parentTaxonFk"); |
364 | 421 |
int rankFk = destRS.getInt("RankFk"); |
365 |
if (taxonStatusFk == 2 || taxonStatusFk == 4 || rankFk <= 10){ //synonym; pro parte syn; kingdom and higher |
|
366 |
result = isNull(childIndexAttr, destRS); |
|
422 |
if (taxonStatusFk == 2 || taxonStatusFk == 4 || taxonStatusFk == 7|| rankFk <= 10){ //synonym; pro parte syn; kingdom and higher
|
|
423 |
result = isNull(childIndexAttr, destRS, id);
|
|
367 | 424 |
}else{ |
368 | 425 |
String childIndex = destRS.getString(childIndexAttr); |
369 | 426 |
String parentIndex = destRS.getString(parentIndexAttr); |
... | ... | |
387 | 444 |
if (rankFk <= 180){ |
388 | 445 |
genusOrUninomial = srcRS.getString("tu_name"); |
389 | 446 |
}else if (rankFk == 190){ |
390 |
genusOrUninomial = srcRS.getString("tu1_name");
|
|
447 |
genusOrUninomial = srcRS.getString("pt_name");
|
|
391 | 448 |
infraGenericEpithet = srcRS.getString("tu_name"); |
392 | 449 |
//TODO compareNameParts does not work this way |
393 | 450 |
// }else if (rankFk == 220){ |
... | ... | |
540 | 597 |
success &= equals("Taxon relation taxon2", "tu_id: " + srcRS.getInt("tu_accfinal"), destRS.getString("t2Id"), id); |
541 | 598 |
success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id); |
542 | 599 |
success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id); |
543 |
success &= isNull("notes", destRS); |
|
600 |
success &= isNull("notes", destRS, id);
|
|
544 | 601 |
//complete if no further relations need to be added |
545 | 602 |
return success; |
546 | 603 |
} |
... | ... | |
618 | 675 |
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type")); |
619 | 676 |
boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id); |
620 | 677 |
success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id); |
621 |
success &= isNull("Note_2", destRs); |
|
678 |
success &= isNull("Note_2", destRs, id);
|
|
622 | 679 |
success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id); |
623 | 680 |
success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id); |
624 |
success &= isNull("Region", destRs); |
|
625 |
success &= isNull("SpeciesExpertGUID", destRs); |
|
681 |
success &= isNull("Region", destRs, id);
|
|
682 |
success &= isNull("SpeciesExpertGUID", destRs, id);
|
|
626 | 683 |
//SpeciesExpertName, LastAction, LastActionDate handled in separate method |
627 | 684 |
//complete |
628 | 685 |
return success; |
... | ... | |
675 | 732 |
success &= equals("Distribution OccurrenceStatusFk", 1, destRs.getInt("OccurrenceStatusFk"), id); |
676 | 733 |
success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id); |
677 | 734 |
//TODO see comments |
678 |
success &= isNull("SourceFk", destRs); //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS) |
|
679 |
success &= isNull("SourceCache", destRs); //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS) |
|
735 |
success &= isNull("SourceFk", destRs, id); //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
|
|
736 |
success &= isNull("SourceCache", destRs, id); //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
|
|
680 | 737 |
success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id); |
681 |
success &= isNull("SpeciesExpertGUID", destRs); //SpeciesExpertGUID does not exist in ERMS |
|
738 |
success &= isNull("SpeciesExpertGUID", destRs, id); //SpeciesExpertGUID does not exist in ERMS
|
|
682 | 739 |
//SpeciesExpertName,LastAction,LastActionDate handled in separate method |
683 | 740 |
return success; |
684 | 741 |
} |
... | ... | |
722 | 779 |
success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id); |
723 | 780 |
success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id); |
724 | 781 |
//TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id); |
725 |
success &= isNull("Region", destRs); //region does not seem to exist in ERMS |
|
782 |
success &= isNull("Region", destRs, id); //region does not seem to exist in ERMS
|
|
726 | 783 |
//TODO see comments |
727 | 784 |
// success &= isNull("SourceFk", destRs); //sources should be moved to extra table, check with PESI 2014 |
728 | 785 |
// success &= isNull("SourceNameCache", destRs); //sources should be moved to extra table, check with PESI 2014 |
729 |
success &= isNull("SpeciesExpertGUID", destRs); //SpeciesExpertGUID does not exist in ERMS |
|
786 |
success &= isNull("SpeciesExpertGUID", destRs, id); //SpeciesExpertGUID does not exist in ERMS
|
|
730 | 787 |
//SpeciesExpertName,LastAction,LastActionDate handled in separate method |
731 | 788 |
//complete |
732 | 789 |
return success; |
... | ... | |
777 | 834 |
success &= equals("Reference title ", srcRS.getString("source_title"), destRS.getString("Title"), id); |
778 | 835 |
success &= equals("Reference author string ", srcRS.getString("source_author"), destRS.getString("AuthorString"), id); |
779 | 836 |
success &= equals("Reference year ", normalizeYear(srcRS.getString("source_year")), destRS.getString("RefYear"), id); |
780 |
success &= isNull("NomRefCache", destRS); //for ERMS no other value was found in 2014 value |
|
837 |
success &= isNull("NomRefCache", destRS, id); //for ERMS no other value was found in 2014 value
|
|
781 | 838 |
success &= equals("Reference link ", srcRS.getString("source_link"), destRS.getString("Link"), id); |
782 | 839 |
success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id); |
783 | 840 |
//complete |
... | ... | |
836 | 893 |
return yearStr; |
837 | 894 |
} |
838 | 895 |
|
839 |
private boolean isNull(String attrName, ResultSet destRS) throws SQLException { |
|
896 |
private boolean isNull(String attrName, ResultSet destRS, String id) throws SQLException {
|
|
840 | 897 |
Object value = destRS.getObject(attrName); |
841 | 898 |
if (value != null){ |
842 |
String message = attrName + " was expected to be null but was: " + value.toString(); |
|
899 |
String message = attrName + " was expected to be null but was: " + value.toString() + "; id = " + id;
|
|
843 | 900 |
logger.warn(message); |
844 | 901 |
return false; |
845 | 902 |
}else{ |
846 |
logger.info(attrName + " was null as expected");
|
|
903 |
logger.info(attrName + " was null as expected; id = " + id);
|
|
847 | 904 |
return true; |
848 | 905 |
} |
849 | 906 |
} |
Also available in: Unified diff
ref #8508 further improvements to ERMS validation