Project

General

Profile

« Previous | Next » 

Revision a0f9168d

Added by Andreas Müller over 4 years ago

ref #8508 latest changes to ERMS pipeline validation

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/validation/PesiErmsValidator.java
37 37
    private Source destination = defaultDestination;
38 38

  
39 39
    public void invoke(Source source, Source destination){
40
        logger.warn("Validate destination " +  destination.getDatabase());
40 41
        boolean success = true;
41 42
        try {
42 43
            this.source = source;
43 44
            this.destination = destination;
44
            success &= testReferences();
45
            success &= testTaxa();
46
            success &= testTaxonRelations();
47
            success &= testCommonNames();
45
//            success &= testReferences();
46
//            success &= testTaxa();
47
//            success &= testTaxonRelations();
48
//            success &= testCommonNames();
49
            success &= testDistributions();
48 50
        } catch (Exception e) {
49 51
            e.printStackTrace();
50 52
            success = false;
......
53 55
        System.out.println("end validation " + (success? "":"NOT ") + "successful.");
54 56
    }
55 57

  
58
    private boolean testDistributions() throws SQLException {
59
        boolean success = testDistributionCount();
60
        if (success){
61
              success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr "));
62
        }
63
        return success;
64
    }
65

  
56 66
    private boolean testCommonNames() throws SQLException {
57 67
        boolean success = testCommonNameCount();
58 68
        if (success){
......
82 92
        return success;
83 93
    }
84 94

  
95
    private boolean testDistributionCount() {
96
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM dr ");
97
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
98
        return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
99
    }
100

  
85 101
    private boolean testCommonNameCount() {
86 102
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
87 103
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
......
89 105
    }
90 106

  
91 107
    private boolean testTaxaCount() {
92
         int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
108
         int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id NOT IN (147415)");
93 109
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon ");
94 110
         return equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
95 111
     }
......
106 122
                + " LEFT OUTER JOIN tu type ON type.id = t.tu_typetaxon "
107 123
                + " LEFT OUTER JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
108 124
                + " LEFT OUTER JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
125
                + " WHERE t.id NOT IN (147415) "
109 126
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
110 127
        ResultSet destRS = destination.getResultSet("SELECT t.*, type.IdInSource typeSourceId "
111 128
                + " FROM Taxon t "
......
141 158
//        success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
142 159
        success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id);
143 160
        success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
144
        //TODO NomRefString
145
        //TODO DisplayName
161
        success &= isNull("NomRefString", destRS);
162
        success &= equals("Taxon DisplayName", srcFullName(srcRS), destRS.getString("DisplayName"), id);  //according to SQL script same as FullName, no nom.ref. information attached
163

  
146 164
//TODO        success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
147 165
//TODO        success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
148 166

  
......
220 238
        }
221 239
    }
222 240

  
241
    private boolean testSingleDistributions(int n) throws SQLException {
242
        boolean success = true;
243
        ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId, gu.gazetteer_id, dr.*, gu.id guId, gu.gu_name "
244
                + " FROM dr INNER JOIN tu ON dr.tu_id = tu.id "
245
                + "    LEFT JOIN gu ON gu.id = dr.gu_id "
246
                + " ORDER BY CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)), gu.gazetteer_id, gu.gu_name ");  //, dr.note (not possible because ntext
247
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaERMSGazetteerId, oc.*, a.AreaName "
248
                + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
249
                + "    LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
250
                + " WHERE t.OriginalDB = 'erms' "
251
                + " ORDER BY t.IdInSource, a.AreaERMSGazetteerId, a.AreaName, oc.Notes ");
252
        int count = 0;
253
        while (srcRs.next() && destRs.next()){
254
            success &= testSingleDistribution(srcRs, destRs);
255
            count++;
256
        }
257
        success &= equals("Distribution count differs", n, count, "-1");
258
        return success;
259
    }
260

  
261
    private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
262
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("gu_name"));
263
        boolean success = equals("Distribution taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
264
        success &= equals("Distribution gazetteer_id ", srcRs.getString("gazetteer_id"), destRs.getString("AreaERMSGazetteerId"), id);
265
        success &= equals("Distribution area name ", srcRs.getString("gu_name"), destRs.getString("AreaName"), id);
266
        success &= equals("Distribution area name ", srcRs.getString("note"), destRs.getString("Notes"), id);
267

  
268
        //TODO
269
        return success;
270
    }
271

  
223 272
    private boolean testSingleCommonNames(int n) throws SQLException {
224 273
        boolean success = true;
225
        ResultSet srcRs = source.getResultSet("SELECT v.*, l.LanName, tu.id tuId "
274
        ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
226 275
                + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
227
                + " ORDER BY CAST(tu.id as nvarchar(20)), tu.id ");
228
        ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2 "
229
                + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk INNER JOIN Language l ON l.LanguageId = cn.LanguageFk "
276
                + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername  ");
277
        ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
278
                + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
230 279
                + " WHERE t.OriginalDB = 'erms' "
231
                + " ORDER BY t.IdInSource, l.ISO639_2");
280
                + " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName");
232 281
        int count = 0;
233 282
        while (srcRs.next() && destRs.next()){
234 283
            success &= testSingleCommonName(srcRs, destRs);
......
238 287
        return success;
239 288
    }
240 289

  
290
    boolean prefer639_3 = true;
291
    String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
292
    String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
293

  
241 294
    private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
242 295
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
243 296
        boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
244
        success &= equals("Common name languageID ", srcRs.getString("lan_id"), destRs.getString("ISO639_2"), id);
297
        success &= equals("Common name languageID ", srcRs.getString("iso"), getLanguageIso(destRs), id);
245 298
        success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
246 299
        //TODO success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
247
        success = equals("CommonName LanguageCache ", srcRs.getString("LanName"), destRs.getString("LanguageCache"), id);
300
        success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
248 301
        success &= isNull("Region", destRs);  //region does not seem to exist in ERMS
249 302

  
250 303
        //TODO
251 304
        return success;
252 305
    }
253 306

  
307
    private String normalizeLang(String string) {
308
        if ("Norwegian Nynorsk".equals(string)){
309
            return "Nynorsk (Norwegian)";
310
        }else if ("Norwegian Bokmål".equals(string)){
311
            return "Bokmål  (Norwegian)";
312
        }else if ("Spanish".equals(string)){
313
            return "Spanish, Castillian";
314
        }else if ("Modern Greek (1453-)".equals(string)){
315
            return "Greek";
316
        }else if ("Hebrew".equals(string)){
317
            return "Israel (Hebrew)";
318
        }else if ("Malay (individual language)".equals(string)){
319
            return "Malay";
320
        }
321

  
322
        return string;
323
    }
324

  
325
    private String getLanguageIso(ResultSet destRs) throws SQLException {
326
        String result = destRs.getString(preferredISO639);
327
        if (result == null){
328
            result = destRs.getString(alternativeISO639);
329
        }
330
        return result;
331
    }
332

  
254 333
    private boolean testSingleReferences() throws SQLException {
255 334
        boolean success = true;
256 335
        ResultSet srcRS = source.getResultSet("SELECT s.* FROM sources s ORDER BY s.id ");

Also available in: Unified diff