Project

General

Profile

« Previous | Next » 

Revision 164fc327

Added by Andreas Müller over 4 years ago

ref #8508 latest changes to PesiErmsValidator

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/validation/PesiErmsValidator.java
19 19
import eu.etaxonomy.cdm.app.pesi.PesiSources;
20 20
import eu.etaxonomy.cdm.common.CdmUtils;
21 21
import eu.etaxonomy.cdm.io.common.Source;
22
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
22 23

  
23 24
/**
24 25
 * Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
......
43 44
            this.source = source;
44 45
            this.destination = destination;
45 46
//            success &= testReferences();
46
//            success &= testTaxa();
47
//            success &= testTaxonRelations();
47
            success &= testTaxa();
48
            success &= testTaxonRelations();
48 49
//            success &= testCommonNames();
49 50
//            success &= testDistributions();
50
            success &= testNotes();
51
//            success &= testNotes();
52
//            success &= testAdditionalTaxonSources();
51 53
        } catch (Exception e) {
52 54
            e.printStackTrace();
53 55
            success = false;
......
56 58
        System.out.println("end validation " + (success? "":"NOT ") + "successful.");
57 59
    }
58 60

  
61
    private boolean testAdditionalTaxonSources() throws SQLException {
62
        System.out.println("Start validate additional taxon sources");
63
        boolean success = testAdditionalTaxonSourcesCount();
64
        if (success){
65
              success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
66
        }
67
        return success;
68
    }
69

  
59 70
    private boolean testNotes() throws SQLException {
71
        System.out.println("Start validate notes");
60 72
        boolean success = testNotesCount();
61
        if (!success){
73
        if (success){
62 74
              success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
63 75
        }
64 76
        return success;
65 77
    }
66 78

  
67 79
    private boolean testDistributions() throws SQLException {
80
        System.out.println("Start validate distributions");
68 81
        boolean success = testDistributionCount();
69 82
        if (success){
70 83
              success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr "));
......
73 86
    }
74 87

  
75 88
    private boolean testCommonNames() throws SQLException {
89
        System.out.println("Start validate common names");
76 90
        boolean success = testCommonNameCount();
77 91
        if (success){
78 92
            success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
......
80 94
        return success;
81 95
    }
82 96

  
83
    private boolean testTaxonRelations() {
84
        boolean success = true;
97
    private boolean testTaxonRelations() throws SQLException {
98
        System.out.println("Start validate taxon relations");
99
        boolean success = testTaxonRelationCount();
100
        if (success){
101
            success &= testSingleTaxonRelations(source.getUniqueInteger(countTaxonRelation));
102
        }
85 103
        return success;
86 104
    }
87 105

  
88 106
    private boolean testTaxa() throws SQLException {
89
            boolean success = testTaxaCount();
90
            if (success){
91
                success &= testSingleTaxa(source.getUniqueInteger("SELECT count(*) FROM tu "));
92
            }
93
            return success;
107
        System.out.println("Start validate taxa");
108
        boolean success = testTaxaCount();
109
        if (success){
110
            success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
111
        }
112
        return success;
94 113
    }
95 114

  
96 115
    private boolean testReferences() throws SQLException {
116
        System.out.println("Start validate references");
97 117
        boolean success = testReferenceCount();
98 118
        if (success){
99 119
            success &= testSingleReferences();
......
101 121
        return success;
102 122
    }
103 123

  
124
    private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts WHERE ts.tu_id <>  147415 ";
125
    private boolean testAdditionalTaxonSourcesCount() {
126
        int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
127
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
128
        return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
129
    }
130

  
104 131
    private boolean testNotesCount() {
105 132
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
106 133
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note ");
......
119 146
        return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
120 147
    }
121 148

  
149
    private final String countTaxonRelation = "SELECT count(*) FROM tu WHERE  tu_acctaxon <> id AND id NOT IN (147415) ";
150
    private boolean testTaxonRelationCount() {
151
         int countSrc = source.getUniqueInteger(countTaxonRelation);
152
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon ");
153
         return equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
154
     }
155

  
156

  
157
    private final String countTaxon = "SELECT count(*) FROM tu WHERE id NOT IN (147415)";
122 158
    private boolean testTaxaCount() {
123
         int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id NOT IN (147415)");
159
         int countSrc = source.getUniqueInteger(countTaxon);
124 160
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon ");
125 161
         return equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
126 162
     }
......
128 164
    private boolean testSingleTaxa(int n) throws SQLException {
129 165
        boolean success = true;
130 166
        ResultSet srcRS = source.getResultSet(""
131
                + " SELECT t.*, acc.tu_sp as acc_sp, st.status_name, "
167
                + " SELECT t.*, r.rank_name, acc.tu_sp as acc_sp, st.status_name, "
132 168
                + "        type.tu_displayname typename, type.tu_authority typeauthor, "
133 169
                + "        fo.fossil_name, qs.qualitystatus_name "
134 170
                + " FROM tu t "
135
                + " LEFT OUTER JOIN tu acc ON acc.id = t.tu_acctaxon "
136
                + " LEFT OUTER JOIN status st ON st.status_id = t.tu_status "
137
                + " LEFT OUTER JOIN tu type ON type.id = t.tu_typetaxon "
138
                + " LEFT OUTER JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
139
                + " LEFT OUTER JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
171
                + " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 40 AND rank_name = 'Subphylum' OR rank_id = 122 AND rank_name='Parvorder')) as r ON t.tu_rank = r.rank_id "
172
                + " LEFT JOIN tu acc ON acc.id = t.tu_acctaxon "
173
                + " LEFT JOIN status st ON st.status_id = t.tu_status "
174
                + " LEFT JOIN tu type ON type.id = t.tu_typetaxon "
175
                + " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
176
                + " LEFT JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
140 177
                + " WHERE t.id NOT IN (147415) "
141 178
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
142
        ResultSet destRS = destination.getResultSet("SELECT t.*, type.IdInSource typeSourceId "
179
        ResultSet destRS = destination.getResultSet("SELECT t.*, s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
143 180
                + " FROM Taxon t "
144
                + "    LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk"
181
                + "    LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
182
                + "    LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
183
                + "    LEFT JOIN Source s ON s.SourceId = t.SourceFk "
145 184
                + " WHERE t.OriginalDB = 'erms' "
146 185
                + " ORDER BY t.IdInSource");
147 186
        ResultSet srcRsLastAction = source.getResultSet(""
......
165 204
    private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
166 205
        String id = String.valueOf(srcRS.getInt("id"));
167 206
        boolean success = equals("Taxon ID", "tu_id: " + srcRS.getInt("id"), destRS.getString("IdInSource"), id);
168
        //TODO SourceFk
207
        success &= equals("Taxon source", "ERMS export for PESI", destRS.getString("sourceName"), id);
169 208
//      success &= compareKingdom("Taxon kingdom", srcRS, destRS, id);
170
        //TODO RankFk, RankCache
209
        success &= equals("Taxon rank fk", srcRS.getString("tu_rank"), destRS.getString("RankFk"), id);
210
//      success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name")), destRS.getString("Rank"), id);
171 211
        //TODO GenusOrUninomial, InfraGenericEpithet, SpecificEpithet, InfraSpecificEpithet
172 212
//      success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id);
173 213
//        success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
174 214
        success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id);
175
        success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
215
//        success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
176 216
        success &= isNull("NomRefString", destRS);
177
        success &= equals("Taxon DisplayName", srcFullName(srcRS), destRS.getString("DisplayName"), id);  //according to SQL script same as FullName, no nom.ref. information attached
217
//        success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id);  //according to SQL script same as FullName, no nom.ref. information attached
178 218

  
179 219
//TODO        success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
180 220
//TODO        success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
......
205 245
        return success;
206 246
    }
207 247

  
248
    /**
249
     * @param string
250
     * @return
251
     */
252
    private String normalizeRank(String string) {
253
        String result = string.replace("Subforma", "Subform")
254
                .replace("Forma", "Form")
255
// --               .replace("Subdivision", "Subphylum")
256
         ;
257
        return result;
258
    }
259

  
208 260
    private String srcFullName(ResultSet srcRs) throws SQLException {
209 261
        String result = null;
210 262
        String epi = srcRs.getString("tu_name");
......
219 271
        return result;
220 272
    }
221 273

  
274
    private String srcDisplayName(ResultSet srcRs) throws SQLException {
275
        String result = null;
276
        String epi = srcRs.getString("tu_name");
277
        epi = " a" + epi;
278
        String display = "<i>"+srcRs.getString("tu_displayname")+"</i>";
279
        display = display.replace(" var. ", "</i> var. <i>").replace(" f. ", "</i> f. <i>");
280
        String sp = srcRs.getString("tu_sp");
281
        if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal
282
            result = display.replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";
283
        }else{
284
            result = CdmUtils.concat(" ", display, srcRs.getString("tu_authority"));
285
        }
286
        return result;
287
    }
288

  
222 289
    private boolean testTaxonLastAction(ResultSet srcRs, ResultSet destRs, String id) throws SQLException {
223 290
        boolean success = true;
224 291
        while (srcRs.next()){
......
253 320
        }
254 321
    }
255 322

  
323
    private boolean testSingleTaxonRelations(int n) throws SQLException {
324
        boolean success = true;
325
        ResultSet srcRS = source.getResultSet(""
326
                + " SELECT t.* "
327
                + " FROM tu t "
328
                + " WHERE t.id NOT IN (147415) AND tu_acctaxon <> id "
329
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
330
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
331
                + " FROM RelTaxon rel "
332
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
333
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
334
                + " WHERE t1.OriginalDB = 'erms' AND t2.OriginalDB = 'erms' "
335
                + " ORDER BY t1.IdInSource");
336
        int i = 0;
337
        while (srcRS.next() && destRS.next()){
338
            success &= testSingleTaxonRelation(srcRS, destRS);
339
            i++;
340
        }
341
        success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
342
        return success;
343
    }
344

  
345
    private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
346
        String id = String.valueOf(srcRS.getInt("id"));
347
        boolean success = equals("Taxon relation taxon1", "tu_id: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
348
        success &= equals("Taxon relation taxon2", "tu_id: " + srcRS.getInt("tu_acctaxon"), destRS.getString("t2Id"), id);
349
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
350
        success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
351
        //TODO enable after next import
352
//        success &= isNull("notes", destRS);
353
        //complete if no further relations need to added
354
        return success;
355
    }
356

  
357

  
358
    private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
359
        boolean success = true;
360
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, MN.*, s.*, su.sourceuse_name "
361
                + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
362
                + "    LEFT JOIN sources s ON s.id = MN.source_id "
363
                + "    LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
364
                + " WHERE MN.tu_id NOT IN (147415)  "
365
                + " ORDER BY CAST(tu.id as nvarchar(20)), MN.sourceuse_id, s.id ");  //, no.note (not possible because ntext
366
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
367
                + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
368
                + "    INNER JOIN Source s ON s.SourceId = ats.SourceFk "
369
                + "    LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
370
                + " WHERE t.OriginalDB = 'erms' "
371
                + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
372
        int count = 0;
373
        while (srcRs.next() && destRs.next()){
374
            success &= testSingleAdditionalTaxonSource(srcRs, destRs);
375
            count++;
376
        }
377
        success &= equals("Notes count differs", n, count, "-1");
378
        return success;
379
    }
380

  
381
    private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
382
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
383
        boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
384
        success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id);  //currently we use the same id in ERMS and PESI
385
        success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
386
        success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
387
        //TODO some records are still truncated ~ >820 characters
388
        //success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
389
        success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
390
        //Complete
391
        return success;
392
    }
256 393

  
257 394
    private boolean testSingleNotes(int n) throws SQLException {
258 395
        boolean success = true;
259
        ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId, no.*, l.LanName "
396
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, no.*, l.LanName "
260 397
                + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
261 398
                + "    LEFT JOIN languages l ON l.LanID = no.lan_id "
262 399
                + " ORDER BY CAST(tu.id as nvarchar(20)), no.type ");  //, no.note (not possible because ntext

Also available in: Unified diff