Project

General

Profile

Download (52.6 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.pesi.validate;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.sql.Timestamp;
14

    
15
import org.apache.commons.lang.StringUtils;
16
import org.apache.log4j.Logger;
17

    
18
import eu.etaxonomy.cdm.app.common.PesiDestinations;
19
import eu.etaxonomy.cdm.app.common.PesiSources;
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.io.common.Source;
22
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
23

    
24
/**
25
 * Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
26
 *
27
 * @author a.mueller
28
 * @since 01.09.2019
29
 */
30
public class PesiErmsValidator {
31

    
32
    private static final Logger logger = Logger.getLogger(PesiErmsValidator.class);
33

    
34
    private static final Source defaultSource = PesiSources.PESI2019_ERMS_2019();
35
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI();
36
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI_2();
37

    
38
    private Source source = defaultSource;
39
    private Source destination = defaultDestination;
40
    private String moneraFilter = " NOT IN (-1)"; // 147415;
41
//    private String moneraFilter = " NOT IN (147415)"; // 147415;
42

    
43
    private String origErms = "OriginalDB = 'ERMS' ";
44

    
45
    public void invoke(Source source, Source destination){
46
        logger.warn("Validate destination " +  destination.getDatabase());
47
        boolean success = true;
48
        try {
49
            this.source = source;
50
            this.destination = destination;
51
//            success &= testReferences();  //ready, few minor issues to be discussed with VLIZ
52
            success &= testTaxa();
53
//            success &= testTaxonRelations();  //name relations count!,  single record compare tests for synonyms and included in
54
//            success &= testCommonNames();  //source(s) discuss VLIZ, exact duplicates (except for sources), Anus(Korur)
55
//            success &= testDistributions();  //>1000 duplicates in "dr", sources (OccurrenceSource table), 1 long note
56
//            success &= testNotes();  //ecology & link notes test (only count tested), sources untested (NoteSource table)
57
//            success &= testAdditionalTaxonSources();  //ready
58
        } catch (Exception e) {
59
            e.printStackTrace();
60
            success = false;
61
        }
62
        System.out.println("end validation " + (success? "":"NOT ") + "successful.");
63
    }
64

    
65
    private boolean testAdditionalTaxonSources() throws SQLException {
66
        System.out.println("Start validate additional taxon sources");
67
        boolean success = testAdditionalTaxonSourcesCount();
68
        if (success){
69
              success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
70
        }
71
        return success;
72
    }
73

    
74
    private boolean testNotes() throws SQLException {
75
        System.out.println("Start validate notes");
76
        boolean success = testNotesCount();
77
        if (success){
78
              success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
79
        }
80
        return success;
81
    }
82

    
83
    private boolean testDistributions() throws SQLException {
84
        System.out.println("Start validate distributions");
85
        boolean success = testDistributionCount();
86
        if (success){
87
              success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr "));
88
        }
89
        return success;
90
    }
91

    
92
    private boolean testCommonNames() throws SQLException {
93
        System.out.println("Start validate common names");
94
        boolean success = testCommonNameCount();
95
        if (success){
96
            success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
97
        }
98
        return success;
99
    }
100

    
101
    int countSynonyms;
102
    int countIncludedIns;
103
    private boolean testTaxonRelations() throws SQLException {
104
        System.out.println("Start validate taxon relations");
105
        boolean success = testSynonymRelations();  //only count, single record test still missing
106
        success &= testIncludedInRelations();  //only count, single record test still missing
107
        success &= testTotalRelations();
108
        success &= testNameRelations();
109
        return success;
110
    }
111

    
112
    private boolean testTotalRelations() {
113
        if (!(countSynonyms < 0 || countIncludedIns < 0)){
114
            int countTotalSrc = countSynonyms + countIncludedIns;
115
            int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
116
            boolean success = equals("Taxrel count + 1 must be same as source taxon count ", countTotalSrc+1, countSrc, String.valueOf(-1));
117
            int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t."+ origErms);
118
            success &= equals("Taxrel count + 1 must be same as destination taxon count ", countTotalSrc+1, countDest, String.valueOf(-1));
119
            return success;
120
        }else{
121
            return false;
122
        }
123
    }
124

    
125
    private boolean testSynonymRelations() throws SQLException {
126

    
127
        int countSrc = source.getUniqueInteger(countSynonymRelation);
128
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
129
        boolean success = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
130
//         update Match_RelStat set RelTaxon  =  102 where tu_unacceptreason like 'currently placed%'
131
//                 update Match_RelStat set RelTaxon   =  102 where tu_unacceptreason like 'currently held%'
132
//                 update Match_RelStat set RelTaxon   =  102 where tu_unacceptreason like 'sy%' or tu_unacceptreason like '%jun%syn%'
133
//                 update Match_RelStat set RelTaxon   =  102 where tu_unacceptreason = '(synonym)'
134
//                 update Match_RelStat set RelTaxon   =  102 where tu_unacceptreason = 'reverted genus transfer'
135
//                 update Match_RelStat set RelTaxon   =  103 where tu_unacceptreason like 'misapplied%'
136
//                 update Match_RelStat set RelTaxon   =  104 where tu_unacceptreason like 'part% synonym%'
137
//                 update Match_RelStat set RelTaxon   =  106 where tu_unacceptreason = 'heterotypic synonym' or tu_unacceptreason = 'subjective synonym'
138
//                 update Match_RelStat set RelTaxon   =  107 where tu_unacceptreason like '%homot%syn%' or tu_unacceptreason = 'objective synonym' synyonym
139
//                 update Match_RelStat set RelTaxon   =  107 where tu_unacceptreason like '%bas[iy][no]%ny%'
140
        if (success){
141
            //TODO test single synonym relations
142
//            success &= testSingleTaxonRelations(source.getUniqueInteger(countSynonymRelation));
143
        }
144
        countSynonyms = (countSrc == countDest)? countSrc : -1;
145
        return success;
146
    }
147

    
148
    private boolean testNameRelations() {
149
        //Name relations
150
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter + " AND ("
151
               + " tu_unacceptreason like '%bas[iy][no]%ny%' OR tu_unacceptreason = 'original combination' "
152
               + " OR tu_unacceptreason = 'Subsequent combination' OR tu_unacceptreason like '%genus transfer%'  "
153
               + " OR tu_unacceptreason = 'genus change' "  //1
154
               + " OR tu_unacceptreason like '%homon%' "   // 2
155
               + " OR tu_unacceptreason like '%spell%' OR tu_unacceptreason like 'lapsus %' " //16
156

    
157
                 + ")");
158
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
159
        boolean success = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
160
        if (success){
161
            //TODO test single name relation
162
//            success &= testSingleTaxonRelations(source.getUniqueInteger(countSynonymRelation));
163
        }
164
        return success;
165
    }
166

    
167
    private boolean testIncludedInRelations() {
168
        int countSrc = source.getUniqueInteger(countParentRelation);
169
        int  countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
170
        boolean success = equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
171
        if (success){
172
            //TODO test single includedIn relations
173
//            success &= testSingleTaxonRelations(source.getUniqueInteger(countSynonymRelation));
174
        }
175
        countIncludedIns = (countSrc == countDest)? countSrc : -1;
176
        return success;
177
    }
178

    
179
    private boolean testTaxa() throws SQLException {
180
        System.out.println("Start validate taxa");
181
        boolean success = testTaxaCount();
182
        //FIXME
183
        if (!success){
184
            success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
185
        }
186
        return success;
187
    }
188

    
189
    private boolean testReferences() throws SQLException {
190
        System.out.println("Start validate references");
191
        boolean success = testReferenceCount();
192
        if (success){
193
            success &= testSingleReferences();
194
        }
195
        return success;
196
    }
197

    
198
    private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts WHERE ts.tu_id " + moneraFilter;
199
    private boolean testAdditionalTaxonSourcesCount() {
200
        int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
201
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
202
        return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
203
    }
204

    
205
    private boolean testNotesCount() {
206
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
207
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
208
                + " WHERE NOT (NoteCategoryFk = 4 AND LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) ");
209
        boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
210

    
211
        countSrc = source.getUniqueInteger("SELECT count(*) FROM tu "
212
                + " WHERE (tu_marine IS NOT NULL OR tu_brackish IS NOT NULL OR tu_fresh IS NOT NULL OR tu_terrestrial IS NOT NULL) "
213
                + "     AND tu.id " + moneraFilter );
214
        countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
215
                + " WHERE (NoteCategoryFk = 4 AND LastAction IS NULL) ");
216
        result &= equals("Notes ecology count ", countSrc, countDest, String.valueOf(-1));
217

    
218
        countSrc = source.getUniqueInteger("SELECT count(*) FROM links ");
219
        countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
220
                + " WHERE NoteCategoryFk IN (22,23,24) ");
221
        result &= equals("Notes link count ", countSrc, countDest, String.valueOf(-1));
222

    
223
        return result;
224
    }
225

    
226
    private boolean testDistributionCount() {
227
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM dr ");
228
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
229
        return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
230
    }
231

    
232
    private boolean testCommonNameCount() {
233
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
234
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
235
        return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
236
    }
237

    
238
    private final String countSynonymRelation = "SELECT count(*) FROM tu syn LEFT JOIN tu acc ON syn.tu_accfinal = acc.id WHERE (syn.id <> acc.id AND syn.tu_accfinal IS NOT NULL AND syn.id <> acc.tu_parent) AND syn.id " + moneraFilter;
239
    private final String countParentRelation  = "SELECT count(*)-1 FROM tu syn LEFT JOIN tu acc ON syn.tu_accfinal = acc.id WHERE (syn.id =  acc.id OR  syn.tu_accfinal IS  NULL OR  syn.id =  acc.tu_parent) AND syn.id " + moneraFilter;
240

    
241
    private final String countTaxon = "SELECT count(*) FROM tu WHERE id " + moneraFilter;
242
    private boolean testTaxaCount() {
243
         int countSrc = source.getUniqueInteger(countTaxon);
244
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon ");
245
         boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
246

    
247
         //NomStatus
248
         countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter + " AND ("
249
               + " tu_unacceptreason like '%inval%' OR  tu_unacceptreason like '%not val%' "
250
               + " OR tu_unacceptreason like '%illeg%' OR tu_unacceptreason like '%nud%' "
251
               + " OR tu_unacceptreason like '%rej.%' OR tu_unacceptreason like '%superfl%' "
252
               + " OR tu_unacceptreason like '%Comb. nov%' OR tu_unacceptreason like '%New name%' "
253
               + " OR tu_unacceptreason = 'new combination'  "
254
               + " OR tu_status IN (3,5,6,7,8) )");
255
         countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon WHERE NameStatusFk IS NOT NULL ");
256
         result = equals("Taxon name status count ", countSrc, countDest, String.valueOf(-1));
257

    
258
         return result;
259
     }
260

    
261
    private boolean testSingleTaxa(int n) throws SQLException {
262
        boolean success = true;
263
        ResultSet srcRS = source.getResultSet(""
264
                + " SELECT t.*, pt.tu_name pt_name, pt.id pId, pt.tu_accfinal pAccId, "
265
                + "        acc.tu_sp as acc_sp, accP.id accPId, "
266
                + "        r.rank_name, st.status_name, "
267
                + "        type.tu_displayname typename, type.tu_authority typeauthor, "
268
                + "        fo.fossil_name, qs.qualitystatus_name "
269
                + " FROM tu t "
270
                + " LEFT JOIN tu as pt on t.tu_parent = pt.id "
271
                + " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 30 AND rank_name = 'Phylum (Division)' OR rank_id = 40 AND rank_name = 'Subphylum (Subdivision)' OR rank_id = 122 AND rank_name='Subsection')) as r ON t.tu_rank = r.rank_id "
272
                + " LEFT JOIN tu acc ON acc.id = t.tu_accfinal "
273
                + " LEFT JOIN tu accP ON acc.tu_parent = accP.id "
274
                + " LEFT JOIN status st ON st.status_id = t.tu_status "
275
                + " LEFT JOIN tu type ON type.id = t.tu_typetaxon "
276
                + " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
277
                + " LEFT JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
278
                + " WHERE t.id " + moneraFilter
279
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
280
        ResultSet destRS = destination.getResultSet("SELECT t.*, "
281
                + "     pt.GenusOrUninomial p_GenusOrUninomial, pt.InfraGenericEpithet p_InfraGenericEpithet, pt.SpecificEpithet p_SpecificEpithet, "
282
                + "     pt.treeIndex pTreeIndex, pt.IdInSource pIdInSource, "
283
                + "     s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
284
                + " FROM Taxon t "
285
                + "    LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
286
                + "    LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
287
                + "    LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
288
                + "    LEFT JOIN Source s ON s.SourceId = t.SourceFk "
289
                + " WHERE t."+ origErms
290
                + " ORDER BY t.IdInSource");
291
        ResultSet srcRsLastAction = source.getResultSet(""
292
                + " SELECT t.id, s.sessiondate, a.action_name, s.ExpertName "
293
                + " FROM tu t "
294
                + "   LEFT OUTER JOIN tu_sessions MN ON t.id = MN.tu_id "
295
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
296
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
297
                + " ORDER BY CAST(t.id as nvarchar(20)), s.sessiondate DESC, a.id DESC ");
298
        int i = 0;
299
        while (srcRS.next() && destRS.next()){
300
            success &= testSingleTaxon(srcRS, destRS);
301
            success &= testLastAction(srcRsLastAction, destRS, String.valueOf(srcRS.getInt("id")), "Taxon");
302
            i++;
303
        }
304
        success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
305
        return success;
306
    }
307

    
308
    private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
309
        String id = String.valueOf(srcRS.getInt("id"));
310
        //complete
311
        boolean success = equals("Taxon ID", "tu_id: " + srcRS.getInt("id"), destRS.getString("IdInSource"), id);
312
        success &= equals("Taxon source", "ERMS export for PESI", destRS.getString("sourceName"), id);
313

    
314
        success &= compareKingdom("Taxon kingdom", srcRS, destRS, id);
315
        success &= equals("Taxon rank fk", srcRS.getString("tu_rank"), destRS.getString("RankFk"), id);
316
        success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name"), srcRS, id), destRS.getString("Rank"), id);
317
        success &= compareNameParts(srcRS, destRS, id);
318

    
319
        success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id);
320
        //in ERMS displayName and webShowName should be equal as we do not have a correctly formatted nom. ref.
321
//        success &= equals("Taxon WebShowName", srcDisplayName(srcRS), destRS.getString("WebShowName"), id);
322
        success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id);
323
//        success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
324
        success &= isNull("NomRefString", destRS, id);
325
//        success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id);  //according to SQL script same as FullName, no nom.ref. information attached
326

    
327
//TODO nameStatusFk       success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
328
//TODO nameStatusCache    success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
329

    
330
        success &= equals("Taxon TaxonStatusFk", normalizeTaxonStatusFk(srcRS),nullSafeInt( destRS,"TaxonStatusFk"), id);
331
        success &= equals("Taxon TaxonStatusCache", normalizeTaxonStatusCache(srcRS), destRS.getString("TaxonStatusCache"), id);
332

    
333
        success &= equals("Taxon ParentTaxonFk", srcParentTaxonFk(srcRS), destParentIdInSource(destRS), id);
334
        Integer orgigTypeNameFk = nullSafeInt(srcRS, "tu_typetaxon");
335
        success &= equals("Taxon TypeNameFk", orgigTypeNameFk == null? null : "tu_id: " + orgigTypeNameFk, destRS.getString("typeSourceId"), id);
336
//TODO  success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id);
337
        success &= equals("Taxon QualityStatusFK", nullSafeInt(srcRS, "tu_qualitystatus"),nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
338
        success &= equals("Taxon QualityStatusCache", srcRS.getString("qualitystatus_name"), destRS.getString("QualityStatusCache"), id);
339
        //the >200 taxa having themselves as grandparents are currently still reported as errors (what they are but they are handled during im/export
340
//        success &= testTreeIndex(destRS, "TreeIndex", "pTreeIndex", id);
341
        success &= equals("Taxon FossilStatusFk", nullSafeInt(srcRS, "tu_fossil"),nullSafeInt( destRS,"FossilStatusFk"), String.valueOf(id));
342
        success &= equals("Taxon FossilStatusCache", srcRS.getString("fossil_name"), destRS.getString("FossilStatusCache"), id);
343
        success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
344
        success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
345
        success &= isNull("ExpertGUID", destRS, id);  //only relevant after merge
346
        success &= isNull("ExpertName", destRS, id);  //only relevant after merge
347
        success &= isNull("SpeciesExpertGUID", destRS, id);  //only relevant after merge
348
        success &= equals("Taxon cache citation", srcRS.getString("cache_citation"), destRS.getString("CacheCitation"), id);
349
        //LastAction(Date) handled in separate method
350
        success &= isNull("GUID2", destRS, id);  //only relevant after merge
351
        success &= isNull("DerivedFromGuid2", destRS, id);  //only relevant after merge
352
        return success;
353
    }
354

    
355
    private Integer destParentIdInSource(ResultSet destRS) throws SQLException {
356
        String parentIdInSource = destRS.getString("pIdInSource");
357
        if (parentIdInSource == null){
358
            return null;
359
        }else{
360
            String idStr = parentIdInSource.replace("tu_id:", "").trim();
361
            Integer result = Integer.valueOf(idStr);
362
            return result;
363
        }
364
    }
365

    
366
    private Integer srcParentTaxonFk(ResultSet srcRS) throws SQLException {
367
        Integer id = nullSafeInt(srcRS,"id");
368
        Integer accId = nullSafeInt(srcRS, "tu_accfinal");
369
        Integer pId = nullSafeInt(srcRS, "pId");
370
        Integer pAccId = nullSafeInt(srcRS, "pAccId");
371
        Integer accPId = nullSafeInt(srcRS, "accPId");  //parent of accepted taxon
372

    
373
        if (accId != null && !id.equals(accId)){
374
            if (id.equals(accPId)){
375
                return pId;    //exceptional handling to avoid recursion mostly for some autonyms and alternate representations
376
            }else{
377
                return null;  //taxon is not accepted
378
            }
379
        }else{
380
            if (id == 1){
381
                return null;  //Biota
382
            }else if (pAccId == null){
383
                return pId;  //handle parent preliminary as accepted
384
            }else if (id.equals(pAccId)){
385
                return pId;  //exceptional handling to avoid recursion mostly for some autonyms and alternate representations
386
            }else{
387
                return pAccId;
388
            }
389
        }
390
    }
391

    
392
    private Integer normalizeTaxonStatusFk(ResultSet srcRS) throws SQLException {
393
        int id = srcRS.getInt("id");
394
        Integer accFinal = nullSafeInt(srcRS, "tu_accfinal");
395
        boolean accFinalDiffers = accFinal != null && !accFinal.equals(id);
396

    
397
        if (accFinalDiffers){
398
            return PesiTransformer.T_STATUS_SYNONYM;  //2
399
        }else{
400
            Integer status = nullSafeInt(srcRS, "tu_status");
401
            if(status == 1){   //accepted
402
                return PesiTransformer.T_STATUS_ACCEPTED;
403
            }else if (status == 6 || status == 8 || status == 10){  //nomen dubium, taxon inquirendum (status uncertain), uncertain
404
                return PesiTransformer.T_STATUS_UNRESOLVED;
405
            }else if (status == 2 || status == 3 || status == 5 || status == 7 || status == 9){  //unaccepted, nomen nudum, alternate representation, temporary name, interim unpublished
406
                return PesiTransformer.T_STATUS_UNACCEPTED;
407
            }
408
        }
409
        //4 does not exist and should not happen
410
        return -1;
411
    }
412

    
413
    private String normalizeTaxonStatusCache(ResultSet srcRS) throws SQLException {
414
        Integer status = normalizeTaxonStatusFk(srcRS);
415
        if (status == 1){
416
            return "accepted";
417
        }else if (status == 2){
418
            return "synonym";
419
        }else if (status == 4){
420
            return "pro parte synonym";
421
        }else if (status == 5){
422
            return "unresolved";
423
        }else if (status == 7){
424
            return "unaccepted";
425
        }else{
426
            return "xxx - not yet handled";
427
        }
428
    }
429

    
430
    private boolean testTreeIndex(ResultSet destRS, String childIndexAttr, String parentIndexAttr, String id) throws SQLException {
431
        boolean result;
432
        int taxonStatusFk = destRS.getInt("TaxonStatusFk");
433
        String parentTaxonId = destRS.getString("parentTaxonFk");
434
        int rankFk = destRS.getInt("RankFk");
435
        if (taxonStatusFk == 2 || taxonStatusFk == 4 || rankFk <= 10){  //synonym; pro parte syn; kingdom and higher
436
            result = isNull(childIndexAttr, destRS, id);
437
        }else{
438
            String childIndex = destRS.getString(childIndexAttr);
439
            String parentIndex = destRS.getString(parentIndexAttr);
440
            parentIndex = parentIndex == null? "#": parentIndex;
441
            result = equals("Tree index", childIndex, parentIndex + parentTaxonId + "#", id);
442
        }
443
        return result;
444
    }
445

    
446
    boolean namePartsFirst = true;
447
    private boolean compareNameParts(ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
448
        if (namePartsFirst){
449
            logger.warn("Validation of name parts not fully implemented (difficult). Currently validated via fullname");
450
            namePartsFirst = false;
451
        }
452
        int rankFk = srcRS.getInt("tu_rank");
453
        String genusOrUninomial = null;
454
        String infraGenericEpithet = null;
455
        String specificEpithet = null;
456
        String infraSpecificEpithet = null;
457
        if (rankFk <= 180){
458
            genusOrUninomial = srcRS.getString("tu_name");
459
        }else if (rankFk == 190){
460
            genusOrUninomial = srcRS.getString("pt_name");
461
            infraGenericEpithet =  srcRS.getString("tu_name");
462
            //TODO compareNameParts does not work this way
463
//        }else if (rankFk == 220){
464
//            genusOrUninomial = destRS.getString("p_GenusOrUninomial");
465
//            infraGenericEpithet = destRS.getString("p_InfraGenericEpithet");
466
//            specificEpithet = srcRS.getString("tu_name");
467
        }else{
468
            //TODO exception (compare name parts)
469
            return false;
470
        }
471
        boolean result = testEpis(destRS, genusOrUninomial, infraGenericEpithet,
472
                specificEpithet, infraSpecificEpithet, id);
473
        return result;
474
    }
475

    
476
    private boolean testEpis(ResultSet destRS, String genusOrUninomial, String infraGenericEpithet, String specificEpithet,
477
            String infraSpecificEpithet, String id) throws SQLException {
478
        boolean result = equals("Taxon genusOrUninomial", genusOrUninomial, destRS.getString("GenusOrUninomial"), id) ;
479
        result &= equals("Taxon infraGenericEpithet", infraGenericEpithet, destRS.getString("InfraGenericEpithet"), id) ;
480
        result &= equals("Taxon specificEpithet", specificEpithet, destRS.getString("SpecificEpithet"), id) ;
481
        result &= equals("Taxon infraSpecificEpithet", infraSpecificEpithet, destRS.getString("InfraSpecificEpithet"), id) ;
482
        return result;
483
    }
484

    
485
    private String normalizeRank(String string, ResultSet srcRS, String id) throws SQLException {
486
        String result = string
487
                .replace("Subforma", "Subform")
488
                .replace("Forma", "Form");
489
        int kingdomFk = Integer.valueOf(getSourceKingdomFk(srcRS, id));
490
        if (kingdomFk == 3 || kingdomFk == 4){
491
            result = result.replace("Subphylum", "Subdivision");
492
            result = result.replace("Phylum", "Division");
493
        }
494
        return result;
495
    }
496

    
497
    //see also ErmsTaxonImport.getExpectedTitleCache()
498
    private String srcFullName(ResultSet srcRs) throws SQLException {
499
        String result = null;
500
        String epi = srcRs.getString("tu_name");
501
        String display = srcRs.getString("tu_displayname");
502
        String sp = srcRs.getString("tu_sp");
503
        if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //autonym, !animal
504
            String authority = srcRs.getString("tu_authority");
505
            result = srcRs.getString("tu_displayname").replaceFirst(epi+" ", CdmUtils.concat(" ", epi, authority)+" ");
506
        }else{
507
            result = CdmUtils.concat(" ", srcRs.getString("tu_displayname"), srcRs.getString("tu_authority"));
508
        }
509
        return result;
510
    }
511

    
512
    private String srcDisplayName(ResultSet srcRs) throws SQLException {
513
        String result = null;
514
        String epi = srcRs.getString("tu_name");
515
        epi = " a" + epi;
516
        String display = "<i>"+srcRs.getString("tu_displayname")+"</i>";
517
        display = display.replace(" var. ", "</i> var. <i>").replace(" f. ", "</i> f. <i>");
518
        String sp = srcRs.getString("tu_sp");
519
        if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal
520
            result = display.replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";
521
        }else{
522
            result = CdmUtils.concat(" ", display, srcRs.getString("tu_authority"));
523
        }
524
        return result;
525
    }
526

    
527
    String lastLastActionId = "-1";
528
    private boolean testLastAction(ResultSet srcRsLastAction, ResultSet destRs, String id, String table) throws SQLException {
529
        try {
530
            boolean success = true;
531
            String srcId = null;
532
            while (srcRsLastAction.next()){
533
                srcId = String.valueOf(srcRsLastAction.getInt("id"));
534
                if (!lastLastActionId.equals(srcId)){
535
                    lastLastActionId = srcId;
536
                    break;
537
                }
538
            }
539
            if(!id.equals(srcId)){
540
                logger.warn("Last Action SourceIDs are not equal: id: " +id + ", la-id: " + srcId);
541
            }
542
            String destStr = destRs.getString("LastAction");
543
            success &= equals(table + " SpeciesExpertName", srcRsLastAction.getString("ExpertName"), destRs.getString("SpeciesExpertName"), id);  //mapping ExpertName => SpeciesExpertName according to SQL script
544
            success &= equals(table + " Last Action", srcRsLastAction.getString("action_name"), destStr == null? null : destStr, id);
545
            success &= equals(table + " Last Action Date", srcRsLastAction.getTimestamp("sessiondate"), destRs.getTimestamp("LastActionDate"), id);
546

    
547
            return success;
548
        } catch (Exception e) {
549
            e.printStackTrace();
550
            throw e;
551
        }
552
    }
553

    
554
    private boolean compareKingdom(String messageStart, ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
555
        String srcKingdom = getSourceKingdomFk(srcRS, id);
556
        Integer intDest = nullSafeInt(destRS, "KingdomFk");
557
        if (intDest == null){
558
            logger.warn(id +": " + messageStart + " must never be null for destination. Biota needs to be 0, all the rest needs to have >0 int value.");
559
            return false;
560
        }else{
561
            return equals(messageStart, srcKingdom, String.valueOf(intDest), id);
562
        }
563
    }
564

    
565
    private String getSourceKingdomFk(ResultSet srcRS, String id) throws SQLException {
566
        String strSrc = srcRS.getString("acc_sp");
567
        if (strSrc == null){
568
            strSrc = srcRS.getString("tu_sp");
569
        }
570
        if (strSrc == null){
571
            if ("1".equals(id)){
572
                strSrc = "0";  //Biota
573
            }else if ("147415".equals(id)){
574
                strSrc = "6";  //Monera is synonym of Bacteria
575
            }else{
576
                strSrc = id;
577
            }
578
        }else{
579
            strSrc = strSrc.substring(1);
580
            strSrc = strSrc.substring(0, strSrc.indexOf("#"));
581
        }
582
        return strSrc;
583
    }
584

    
585
    private boolean testSingleTaxonRelations(int n) throws SQLException {
586
        boolean success = true;
587
        ResultSet srcRS = source.getResultSet(""
588
                + " SELECT t.* "
589
                + " FROM tu t "
590
                + " WHERE t.id "+ moneraFilter + " AND tu_accfinal <> id "
591
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
592
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
593
                + " FROM RelTaxon rel "
594
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
595
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
596
                + " WHERE t1."+origErms+" AND t2." + origErms
597
                + " ORDER BY t1.IdInSource");
598
        int i = 0;
599
        while (srcRS.next() && destRS.next()){
600
            success &= testSingleTaxonRelation(srcRS, destRS);
601
            i++;
602
        }
603
        success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
604
        return success;
605
    }
606

    
607
    private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
608
        String id = String.valueOf(srcRS.getInt("id"));
609
        boolean success = equals("Taxon relation taxon1", "tu_id: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
610
        success &= equals("Taxon relation taxon2", "tu_id: " + srcRS.getInt("tu_accfinal"), destRS.getString("t2Id"), id);
611
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
612
        success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
613
        success &= isNull("notes", destRS, id);
614
        //complete if no further relations need to be added
615
        return success;
616
    }
617

    
618
    private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
619
        boolean success = true;
620
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, MN.*, s.*, su.sourceuse_name "
621
                + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
622
                + "    LEFT JOIN sources s ON s.id = MN.source_id "
623
                + "    LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
624
                + " WHERE MN.tu_id  " + moneraFilter
625
                + " ORDER BY CAST(tu.id as nvarchar(20)), MN.sourceuse_id, s.id ");  //, no.note (not possible because ntext
626
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
627
                + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
628
                + "    INNER JOIN Source s ON s.SourceId = ats.SourceFk "
629
                + "    LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
630
                + " WHERE t."+origErms
631
                + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
632
        int count = 0;
633
        while (srcRs.next() && destRs.next()){
634
            success &= testSingleAdditionalTaxonSource(srcRs, destRs);
635
            count++;
636
        }
637
        success &= equals("Notes count differs", n, count, "-1");
638
        return success;
639
    }
640

    
641
    private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
642
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
643
        boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
644
        success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id);  //currently we use the same id in ERMS and PESI
645
        success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
646
        success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
647
        //TODO some records are still truncated ~ >820 characters
648
        success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
649
        success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
650
        //Complete
651
        return success;
652
    }
653

    
654
    private boolean testSingleNotes(int n) throws SQLException {
655
        boolean success = true;
656
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, no.*, l.LanName "
657
                + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
658
                + "    LEFT JOIN languages l ON l.LanID = no.lan_id "
659
                + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable ");  //, no.note (not possible because ntext
660
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
661
                + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
662
                + "    LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
663
                + "    LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
664
                + " WHERE t." + origErms
665
                + "      AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
666
                + " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1  ");
667
        int count = 0;
668
        ResultSet srcRsLastAction = source.getResultSet(""
669
                + " SELECT no.id, s.sessiondate, a.action_name, s.ExpertName "
670
                + " FROM notes no "
671
                + "   INNER JOIN tu ON tu.id = no.tu_id "
672
                + "   LEFT JOIN languages l ON l.LanID = no.lan_id"
673
                + "   LEFT JOIN notes_sessions MN ON no.id = MN.note_id "
674
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
675
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
676
                + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable, s.sessiondate DESC, a.id DESC ");
677

    
678
        while (srcRs.next() && destRs.next()){
679
            success &= testSingleNote(srcRs, destRs);
680
            success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "Note");
681
            count++;
682
        }
683
        success &= equals("Notes count differs", n, count, "-1");
684
        return success;
685
    }
686

    
687
    private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
688
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
689
        boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
690
        success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
691
        success &= isNull("Note_2", destRs, id);
692
        success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
693
        success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
694
        success &= isNull("Region", destRs, id);
695
        success &= isNull("SpeciesExpertGUID", destRs, id);
696
        //SpeciesExpertName, LastAction, LastActionDate handled in separate method
697
        //complete
698
        return success;
699
    }
700

    
701
    private String normalizeNoteCatCache(String string) {
702
        return StringUtils.capitalize(string)
703
                .replace("Original Combination", "Original combination")
704
                .replace("Taxonomic remark", "Taxonomic Remark");
705
    }
706

    
707
    private boolean testSingleDistributions(int n) throws SQLException {
708
        boolean success = true;
709
        ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId,"
710
                + " gu.gazetteer_id, dr.*, gu.id guId, gu.gu_name "
711
                + " FROM dr INNER JOIN tu ON dr.tu_id = tu.id "
712
                + "    LEFT JOIN gu ON gu.id = dr.gu_id "
713
                + " ORDER BY CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)), gu.gazetteer_id, gu.gu_name, dr.noteSortable ");  //, dr.note (not possible because ntext
714
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaERMSGazetteerId, oc.*, a.AreaName "
715
                + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
716
                + "    LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
717
                + " WHERE t." + origErms
718
                + " ORDER BY t.IdInSource, a.AreaERMSGazetteerId, a.AreaName, oc.Notes ");
719
        ResultSet srcRsLastAction = source.getResultSet(""
720
                + " SELECT dr.id, s.sessiondate, a.action_name, s.ExpertName "
721
                + " FROM dr "
722
                + "   INNER JOIN tu ON tu.id = dr.tu_id "
723
                + "   LEFT JOIN gu ON gu.id = dr.gu_id "
724
                + "   LEFT JOIN dr_sessions MN ON dr.id = MN.dr_id "
725
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
726
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
727
                + " ORDER BY CAST(tu.id as nvarchar(20)), gu.gazetteer_id, gu.gu_name, s.sessiondate DESC, a.id DESC ");
728
        int count = 0;
729
        while (srcRs.next() && destRs.next()){
730
            success &= testSingleDistribution(srcRs, destRs);
731
            //there are >1000 duplicates in dr, therefore this creates lots of warnings
732
            success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "Distribution");
733
            count++;
734
        }
735
        success &= equals("Distribution count differs", n, count, "-1");
736
        return success;
737
    }
738

    
739
    private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
740
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("gu_name"));
741
        boolean success = equals("Distribution taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
742
        success &= equals("Distribution gazetteer_id ", srcRs.getString("gazetteer_id"), destRs.getString("AreaERMSGazetteerId"), id);
743
        success &= equals("Distribution area name ", srcRs.getString("gu_name"), destRs.getString("AreaName"), id);
744
        success &= equals("Distribution area name cache", srcRs.getString("gu_name"), destRs.getString("AreaNameCache"), id);
745
        success &= equals("Distribution OccurrenceStatusFk", 1, destRs.getInt("OccurrenceStatusFk"), id);
746
        success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
747
        //TODO see comments
748
        success &= isNull("SourceFk", destRs, id);  //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
749
        success &= isNull("SourceCache", destRs, id);  //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
750
        success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
751
        success &= isNull("SpeciesExpertGUID", destRs, id);  //SpeciesExpertGUID does not exist in ERMS
752
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
753
        return success;
754
    }
755

    
756
    private boolean testSingleCommonNames(int n) throws SQLException {
757
        boolean success = true;
758
        ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
759
                + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
760
                + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
761
        ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
762
                + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
763
                + " WHERE t." + origErms
764
                + " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate ");  //sorting also lastActionDate results in a minimum of exact duplicate problems
765
        ResultSet srcRsLastAction = source.getResultSet(""
766
                + " SELECT v.id, s.sessiondate, a.action_name, s.ExpertName "
767
                + " FROM vernaculars v "
768
                + "   INNER JOIN tu ON tu.id = v.tu_id "
769
                + "   LEFT JOIN languages l ON l.LanID = v.lan_id"
770
                + "   LEFT JOIN vernaculars_sessions MN ON v.id = MN.vernacular_id "
771
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
772
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
773
                + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id, s.sessiondate DESC, a.id DESC ");
774
        int count = 0;
775
        while (srcRs.next() && destRs.next()){
776
            success &= testSingleCommonName(srcRs, destRs);
777
            success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "CommonName");
778
            count++;
779
        }
780
        success &= equals("Common name count differs", n, count, "-1");
781
        return success;
782
    }
783

    
784
    boolean prefer639_3 = true;
785
    String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
786
    String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
787

    
788
    private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
789
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
790
        boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
791
        success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
792
        success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
793
        success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
794
        //TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
795
        success &= isNull("Region", destRs, id);  //region does not seem to exist in ERMS
796
        //TODO see comments
797
//        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table, check with PESI 2014
798
//        success &= isNull("SourceNameCache", destRs);  //sources should be moved to extra table, check with PESI 2014
799
        success &= isNull("SpeciesExpertGUID", destRs, id);  //SpeciesExpertGUID does not exist in ERMS
800
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
801
        //complete
802
        return success;
803
    }
804

    
805
    private String normalizeLang(String string) {
806
        if ("Spanish".equals(string)){
807
            return "Spanish, Castillian";
808
        }else if ("Modern Greek (1453-)".equals(string)){
809
            return "Greek";
810
        }else if ("Malay (individual language)".equals(string)){
811
            return "Malay";
812
        }else if ("Swahili (individual language)".equals(string)){
813
            return "Swahili";
814
        }
815

    
816
        return string;
817
    }
818

    
819
    private String getLanguageIso(ResultSet destRs) throws SQLException {
820
        String result = destRs.getString(preferredISO639);
821
        if (result == null){
822
            result = destRs.getString(alternativeISO639);
823
        }
824
        return result;
825
    }
826

    
827
    private boolean testSingleReferences() throws SQLException {
828
        boolean success = true;
829
        ResultSet srcRS = source.getResultSet("SELECT s.* FROM sources s ORDER BY s.id ");
830
        ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
831
                + " WHERE s." + origErms
832
                + " ORDER BY s.RefIdInSource ");  // +1 for the source reference "erms" but this has no OriginalDB
833
        while (srcRS.next() && destRS.next()){
834
            success &= testSingleReference(srcRS, destRS);
835
        }
836
        return success;
837
    }
838

    
839
    private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
840
        String id = String.valueOf(srcRS.getInt("id"));
841
        boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
842
        success &= equals("Reference IMIS_id ", srcRS.getString("imis_id"), destRS.getString("IMIS_Id"), id);
843
        success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("source_type")), destRS.getInt("SourceCategoryFk"), id);
844
        success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("source_type")), destRS.getString("SourceCategoryCache"), id);
845
        success &= equals("Reference name ", srcRS.getString("source_name"), destRS.getString("Name"), id);
846
        success &= equals("Reference abstract ", srcRS.getString("source_abstract"), destRS.getString("Abstract"), id);
847
        success &= equals("Reference title ", srcRS.getString("source_title"), destRS.getString("Title"), id);
848
        success &= equals("Reference author string ", srcRS.getString("source_author"), destRS.getString("AuthorString"), id);
849
        success &= equals("Reference year ", normalizeYear(srcRS.getString("source_year")), destRS.getString("RefYear"), id);
850
        success &= isNull("NomRefCache", destRS, id);  //for ERMS no other value was found in 2014 value
851
        success &= equals("Reference link ", srcRS.getString("source_link"), destRS.getString("Link"), id);
852
        success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
853
        //complete
854
        return success;
855
    }
856

    
857
    private Integer convertSourceTypeFk(String sourceType) {
858
        if (sourceType == null){
859
            return null;
860
        }else if ("d".equals(sourceType)){
861
            return 4;
862
        }else if ("e".equals(sourceType)){
863
            return 5;
864
        }else if ("p".equals(sourceType)){
865
            return 11;
866
        }
867
        return null;
868
    }
869
    private String convertSourceTypeCache(String sourceType) {
870
        if (sourceType == null){
871
            return null;
872
        }else if ("d".equals(sourceType)){
873
            return "database";
874
        }else if ("e".equals(sourceType)){
875
            return "informal reference";
876
        }else if ("p".equals(sourceType)){
877
            return "publication";
878
        }
879
        return null;
880
    }
881

    
882
    private boolean testReferenceCount() {
883
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM sources ");
884
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origErms);  // +1 for the source reference "erms" but this has no OriginalDB
885
        boolean success = equals("Reference count ", countSrc, countDest, "-1");
886
        return success;
887
    }
888

    
889
    //NOTE: there could be better parsing of source_year during import, this may also need better normalizing in the database
890
    private String normalizeYear(String yearStr) {
891
        if (StringUtils.isBlank(yearStr)){
892
            return yearStr;
893
        }
894
        yearStr = yearStr.trim();
895
        if (yearStr.matches("\\d{4}-\\d{2}")){
896
            yearStr = yearStr.substring(0, 5)+yearStr.substring(0, 2)+yearStr.substring(5);
897
        }
898
        if (yearStr.equals("20 Mar 1891")){
899
            yearStr = "20.3.1891";
900
        }
901
        if (yearStr.equals("July 1900")){
902
            yearStr = "7.1900";
903
        }
904
        return yearStr;
905
    }
906

    
907
    private boolean isNull(String attrName, ResultSet destRS, String id) throws SQLException {
908
        Object value = destRS.getObject(attrName);
909
        if (value != null){
910
            String message = attrName + " was expected to be null but was: " + value.toString() + "; id = " + id;
911
            logger.warn(message);
912
            return false;
913
        }else{
914
            logger.info(attrName + " was null as expected; id = " + id);
915
            return true;
916
        }
917
    }
918

    
919
    private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
920
        if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
921
            String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+  srcDate + "; Destination: " + destDate;
922
            logger.warn(message);
923
            return false;
924
        }else{
925
            logger.info(messageStart + " were equal: " + srcDate);
926
            return true;
927
        }
928
    }
929

    
930
    private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
931
        String strId = id.equals("-1")? "": (id+ ": ");
932
        if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
933
            String message = strId+ messageStart + " must be equal, but was not.\n Source: "+  nSrc + "; Destination: " + nDest;
934
            logger.warn(message);
935
            return false;
936
        }else{
937
            logger.info(strId + messageStart + " were equal: " + nSrc);
938
            return true;
939
        }
940
    }
941

    
942
    private boolean equals(String messageStart, String strSrc, String strDest, String id) {
943
        if (StringUtils.isBlank(strSrc)){
944
            strSrc = null;
945
        }else{
946
            strSrc = strSrc.trim();
947
        }
948
        //we do not trim strDest here because this should be done during import already. If not it should be shown here
949
        if (!CdmUtils.nullSafeEqual(strSrc, strDest)){
950
            int index = CdmUtils.diffIndex(strSrc, strDest);
951
            String message = id+ ": " + messageStart + " must be equal, but was not at "+index+".\n  Source:      "+  strSrc + "\n  Destination: " + strDest;
952
            logger.warn(message);
953
            return false;
954
        }else{
955
            logger.info(id+ ": " + messageStart + " were equal: " + strSrc);
956
            return true;
957
        }
958
    }
959

    
960
    protected Integer nullSafeInt(ResultSet rs, String columnName) throws SQLException {
961
        Object intObject = rs.getObject(columnName);
962
        if (intObject == null){
963
            return null;
964
        }else{
965
            return Integer.valueOf(intObject.toString());
966
        }
967
    }
968

    
969
//** ************* MAIN ********************************************/
970

    
971

    
972

    
973
    public static void main(String[] args){
974
        PesiErmsValidator validator = new PesiErmsValidator();
975
        validator.invoke(defaultSource, defaultDestination);
976
        System.exit(0);
977
    }
978
}
(1-1/2)