Project

General

Profile

« Previous | Next » 

Revision f859201a

Added by Andreas Müller over 4 years ago

ref #8508 further improvements to ERMS validation

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/validate/PesiErmsValidator.java
32 32
    private static final Logger logger = Logger.getLogger(PesiErmsValidator.class);
33 33

  
34 34
    private static final Source defaultSource = PesiSources.PESI2019_ERMS_2019();
35
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI();
36
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI_2();
35
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI();
36
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI_2();
37 37

  
38 38
    private Source source = defaultSource;
39 39
    private Source destination = defaultDestination;
......
261 261
    private boolean testSingleTaxa(int n) throws SQLException {
262 262
        boolean success = true;
263 263
        ResultSet srcRS = source.getResultSet(""
264
                + " SELECT t.*, tu1.tu_name tu1_name, r.rank_name, acc.tu_sp as acc_sp, st.status_name, "
264
                + " SELECT t.*, pt.tu_name pt_name, pt.id pId, pt.tu_accfinal pAccId, "
265
                + "        acc.tu_sp as acc_sp, accP.id accPId, "
266
                + "        r.rank_name, st.status_name, "
265 267
                + "        type.tu_displayname typename, type.tu_authority typeauthor, "
266 268
                + "        fo.fossil_name, qs.qualitystatus_name "
267 269
                + " FROM tu t "
268
                + " LEFT JOIN tu as tu1 on t.tu_parent = tu1.id "
270
                + " LEFT JOIN tu as pt on t.tu_parent = pt.id "
269 271
                + " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 30 AND rank_name = 'Phylum (Division)' OR rank_id = 40 AND rank_name = 'Subphylum (Subdivision)' OR rank_id = 122 AND rank_name='Subsection')) as r ON t.tu_rank = r.rank_id "
270 272
                + " LEFT JOIN tu acc ON acc.id = t.tu_accfinal "
273
                + " LEFT JOIN tu accP ON acc.tu_parent = accP.id "
271 274
                + " LEFT JOIN status st ON st.status_id = t.tu_status "
272 275
                + " LEFT JOIN tu type ON type.id = t.tu_typetaxon "
273 276
                + " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
......
276 279
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
277 280
        ResultSet destRS = destination.getResultSet("SELECT t.*, "
278 281
                + "     pt.GenusOrUninomial p_GenusOrUninomial, pt.InfraGenericEpithet p_InfraGenericEpithet, pt.SpecificEpithet p_SpecificEpithet, "
279
                + "     pt.treeIndex pTreeIndex, "
282
                + "     pt.treeIndex pTreeIndex, pt.IdInSource pIdInSource, "
280 283
                + "     s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
281 284
                + " FROM Taxon t "
282 285
                + "    LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
......
314 317
        success &= compareNameParts(srcRS, destRS, id);
315 318

  
316 319
        success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id);
317
//TODO webShowName  success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
320
        //in ERMS displayName and webShowName should be equal as we do not have a correctly formatted nom. ref.
321
//        success &= equals("Taxon WebShowName", srcDisplayName(srcRS), destRS.getString("WebShowName"), id);
318 322
        success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id);
319 323
//        success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
320
        success &= isNull("NomRefString", destRS);
324
        success &= isNull("NomRefString", destRS, id);
321 325
//        success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id);  //according to SQL script same as FullName, no nom.ref. information attached
322 326

  
323 327
//TODO nameStatusFk       success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
324 328
//TODO nameStatusCache    success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
325 329

  
326
       success &= equals("Taxon TaxonStatusFk", makePesiTaxonStatus(srcRS, "tu_status"),nullSafeInt( destRS,"TaxonStatusFk"), id);
327
/*//TODO taxonStatusCache  success &= equals("Taxon TaxonStatusCache", srcRS.getString("status_name"), destRS.getString("TaxonStatusCache"), id);
328
*/
329
        //TODO ParentTaxonFk
330
//      success &= equals("Taxon TaxonStatusFk", normalizeTaxonStatusFk(srcRS),nullSafeInt( destRS,"TaxonStatusFk"), id);
331
//      success &= equals("Taxon TaxonStatusCache", normalizeTaxonStatusCache(srcRS), destRS.getString("TaxonStatusCache"), id);
332

  
333
        success &= equals("Taxon ParentTaxonFk", srcParentTaxonFk(srcRS), destParentIdInSource(destRS), id);
330 334
        Integer orgigTypeNameFk = nullSafeInt(srcRS, "tu_typetaxon");
331 335
        success &= equals("Taxon TypeNameFk", orgigTypeNameFk == null? null : "tu_id: " + orgigTypeNameFk, destRS.getString("typeSourceId"), id);
332 336
//TODO  success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id);
333 337
        success &= equals("Taxon QualityStatusFK", nullSafeInt(srcRS, "tu_qualitystatus"),nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
334 338
        success &= equals("Taxon QualityStatusCache", srcRS.getString("qualitystatus_name"), destRS.getString("QualityStatusCache"), id);
335
        success &= testTreeIndex(destRS, ("TreeIndex"), ("pTreeIndex"), id);
339
        success &= testTreeIndex(destRS, "TreeIndex", "pTreeIndex", id);
336 340
        success &= equals("Taxon FossilStatusFk", nullSafeInt(srcRS, "tu_fossil"),nullSafeInt( destRS,"FossilStatusFk"), String.valueOf(id));
337 341
        success &= equals("Taxon FossilStatusCache", srcRS.getString("fossil_name"), destRS.getString("FossilStatusCache"), id);
338 342
        success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
339 343
        success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
340
        success &= isNull("ExpertGUID", destRS);  //only relevant after merge
341
        success &= isNull("ExpertName", destRS);  //only relevant after merge
342
        success &= isNull("SpeciesExpertGUID", destRS);  //only relevant after merge
344
        success &= isNull("ExpertGUID", destRS, id);  //only relevant after merge
345
        success &= isNull("ExpertName", destRS, id);  //only relevant after merge
346
        success &= isNull("SpeciesExpertGUID", destRS, id);  //only relevant after merge
343 347
        success &= equals("Taxon cache citation", srcRS.getString("cache_citation"), destRS.getString("CacheCitation"), id);
344 348
        //LastAction(Date) handled in separate method
345
        success &= isNull("GUID2", destRS);  //only relevant after merge
346
        success &= isNull("DerivedFromGuid2", destRS);  //only relevant after merge
349
        success &= isNull("GUID2", destRS, id);  //only relevant after merge
350
        success &= isNull("DerivedFromGuid2", destRS, id);  //only relevant after merge
347 351
        return success;
348 352
    }
349 353

  
350
    private Integer makePesiTaxonStatus(ResultSet srcRS, String string) throws SQLException {
354
    private Integer destParentIdInSource(ResultSet destRS) throws SQLException {
355
        String parentIdInSource = destRS.getString("pIdInSource");
356
        if (parentIdInSource == null){
357
            return null;
358
        }else{
359
            String idStr = parentIdInSource.replace("tu_id:", "").trim();
360
            Integer result = Integer.valueOf(idStr);
361
            return result;
362
        }
363
    }
364

  
365
    private Integer srcParentTaxonFk(ResultSet srcRS) throws SQLException {
366
        Integer id = nullSafeInt(srcRS,"id");
367
        Integer accId = nullSafeInt(srcRS, "tu_accfinal");
368
        Integer pId = nullSafeInt(srcRS, "pId");
369
        Integer pAccId = nullSafeInt(srcRS, "pAccId");
370
        Integer accPId = nullSafeInt(srcRS, "accPId");  //parent of accepted taxon
371

  
372
        if (accId != null && !id.equals(accId)){
373
            if (id.equals(accPId)){
374
                return pId;    //exceptional handling to avoid recursion mostly for some autonyms and alternate representations
375
            }else{
376
                return null;  //taxon is not accepted
377
            }
378
        }else{
379
            if (id == 1){
380
                return null;  //Biota
381
            }else if (pAccId == null){
382
                return pId;  //handle parent preliminary as accepted
383
            }else if (id.equals(pAccId)){
384
                return pId;  //exceptional handling to avoid recursion mostly for some autonyms and alternate representations
385
            }else{
386
                return pAccId;
387
            }
388
        }
389
    }
390

  
391
    private Integer normalizeTaxonStatusFk(ResultSet srcRS) throws SQLException {
351 392
        Integer status = nullSafeInt(srcRS, "tu_status");
352
        if(status == 1 || status == 2){   //accepted, unaccepted
353
            return status;
354
        }else if (status == 3 || status == 6 || status == 7){  //nomen nudum, nomen dubium, temporary name
393
        if(status == 1 || status == 6 || status == 7 || status == 8 || status == 9 || status == 10 ){   //accepted, nomen dubium, temporary name, taxon inquirendum (status uncertain), interim unpublished, uncertain
394
            return 1;
395
        }else if (status == 2 || status == 3 || status == 5){  //unaccepted, nomen nudum, alternate representation
355 396
            return 2;
356 397
        }
398
        //4 does not exist and should not happen
357 399
        return -1;
358 400
    }
359 401

  
402
    private String normalizeTaxonStatusCache(ResultSet srcRS) throws SQLException {
403
        Integer status = normalizeTaxonStatusFk(srcRS);
404
        if (status == 1){
405
            return "accepted";
406
        }else if (status == 2){
407
            return "synonym";
408
        }else if (status == 4){
409
            return "pro parte synonym";
410
        }else if (status == 7){
411
            return "unaccepted";
412
        }else{
413
            return "xxx - not yet handled";
414
        }
415
    }
416

  
360 417
    private boolean testTreeIndex(ResultSet destRS, String childIndexAttr, String parentIndexAttr, String id) throws SQLException {
361 418
        boolean result;
362 419
        int taxonStatusFk = destRS.getInt("TaxonStatusFk");
363 420
        String parentTaxonId = destRS.getString("parentTaxonFk");
364 421
        int rankFk = destRS.getInt("RankFk");
365
        if (taxonStatusFk == 2 || taxonStatusFk == 4 || rankFk <= 10){  //synonym; pro parte syn; kingdom and higher
366
            result = isNull(childIndexAttr, destRS);
422
        if (taxonStatusFk == 2 || taxonStatusFk == 4 || taxonStatusFk == 7|| rankFk <= 10){  //synonym; pro parte syn; kingdom and higher
423
            result = isNull(childIndexAttr, destRS, id);
367 424
        }else{
368 425
            String childIndex = destRS.getString(childIndexAttr);
369 426
            String parentIndex = destRS.getString(parentIndexAttr);
......
387 444
        if (rankFk <= 180){
388 445
            genusOrUninomial = srcRS.getString("tu_name");
389 446
        }else if (rankFk == 190){
390
            genusOrUninomial = srcRS.getString("tu1_name");
447
            genusOrUninomial = srcRS.getString("pt_name");
391 448
            infraGenericEpithet =  srcRS.getString("tu_name");
392 449
            //TODO compareNameParts does not work this way
393 450
//        }else if (rankFk == 220){
......
540 597
        success &= equals("Taxon relation taxon2", "tu_id: " + srcRS.getInt("tu_accfinal"), destRS.getString("t2Id"), id);
541 598
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
542 599
        success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
543
        success &= isNull("notes", destRS);
600
        success &= isNull("notes", destRS, id);
544 601
        //complete if no further relations need to be added
545 602
        return success;
546 603
    }
......
618 675
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
619 676
        boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
620 677
        success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
621
        success &= isNull("Note_2", destRs);
678
        success &= isNull("Note_2", destRs, id);
622 679
        success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
623 680
        success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
624
        success &= isNull("Region", destRs);
625
        success &= isNull("SpeciesExpertGUID", destRs);
681
        success &= isNull("Region", destRs, id);
682
        success &= isNull("SpeciesExpertGUID", destRs, id);
626 683
        //SpeciesExpertName, LastAction, LastActionDate handled in separate method
627 684
        //complete
628 685
        return success;
......
675 732
        success &= equals("Distribution OccurrenceStatusFk", 1, destRs.getInt("OccurrenceStatusFk"), id);
676 733
        success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
677 734
        //TODO see comments
678
        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
679
        success &= isNull("SourceCache", destRs);  //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
735
        success &= isNull("SourceFk", destRs, id);  //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
736
        success &= isNull("SourceCache", destRs, id);  //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
680 737
        success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
681
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in ERMS
738
        success &= isNull("SpeciesExpertGUID", destRs, id);  //SpeciesExpertGUID does not exist in ERMS
682 739
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
683 740
        return success;
684 741
    }
......
722 779
        success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
723 780
        success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
724 781
        //TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
725
        success &= isNull("Region", destRs);  //region does not seem to exist in ERMS
782
        success &= isNull("Region", destRs, id);  //region does not seem to exist in ERMS
726 783
        //TODO see comments
727 784
//        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table, check with PESI 2014
728 785
//        success &= isNull("SourceNameCache", destRs);  //sources should be moved to extra table, check with PESI 2014
729
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in ERMS
786
        success &= isNull("SpeciesExpertGUID", destRs, id);  //SpeciesExpertGUID does not exist in ERMS
730 787
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
731 788
        //complete
732 789
        return success;
......
777 834
        success &= equals("Reference title ", srcRS.getString("source_title"), destRS.getString("Title"), id);
778 835
        success &= equals("Reference author string ", srcRS.getString("source_author"), destRS.getString("AuthorString"), id);
779 836
        success &= equals("Reference year ", normalizeYear(srcRS.getString("source_year")), destRS.getString("RefYear"), id);
780
        success &= isNull("NomRefCache", destRS);  //for ERMS no other value was found in 2014 value
837
        success &= isNull("NomRefCache", destRS, id);  //for ERMS no other value was found in 2014 value
781 838
        success &= equals("Reference link ", srcRS.getString("source_link"), destRS.getString("Link"), id);
782 839
        success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
783 840
        //complete
......
836 893
        return yearStr;
837 894
    }
838 895

  
839
    private boolean isNull(String attrName, ResultSet destRS) throws SQLException {
896
    private boolean isNull(String attrName, ResultSet destRS, String id) throws SQLException {
840 897
        Object value = destRS.getObject(attrName);
841 898
        if (value != null){
842
            String message = attrName + " was expected to be null but was: " + value.toString();
899
            String message = attrName + " was expected to be null but was: " + value.toString() + "; id = " + id;
843 900
            logger.warn(message);
844 901
            return false;
845 902
        }else{
846
            logger.info(attrName + " was null as expected");
903
            logger.info(attrName + " was null as expected; id = " + id);
847 904
            return true;
848 905
        }
849 906
    }

Also available in: Unified diff