Project

General

Profile

Download (55.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.pesi.validate;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.sql.Timestamp;
14
import java.time.LocalDate;
15
import java.util.UUID;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19

    
20
import eu.etaxonomy.cdm.app.common.CdmDestinations;
21
import eu.etaxonomy.cdm.app.common.PesiDestinations;
22
import eu.etaxonomy.cdm.common.CdmUtils;
23
import eu.etaxonomy.cdm.common.UTF8;
24
import eu.etaxonomy.cdm.database.ICdmDataSource;
25
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
26
import eu.etaxonomy.cdm.io.common.Source;
27
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
28
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
29
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
30

    
31
/**
32
 * Tests the E+M -> PESI pipeline by comparing the source DB with destination PESI DB.
33
 *
34
 * @author a.mueller
35
 * @since 08.10.2019
36
 */
37
public class PesiEuroMedValidator {
38

    
39
    private static final Logger logger = Logger.getLogger(PesiEuroMedValidator.class);
40

    
41
    private static final ICdmDataSource defaultSource = CdmDestinations.cdm_test_local_mysql_euromed();
42
//    private static final ICdmDataSource defaultSource = CdmDestinations.cdm_pesi2019_final();
43
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI();
44
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI_2();
45

    
46
    boolean doReferences = false;
47
    boolean doTaxa = true;
48
    boolean doTaxRels = false;
49
    boolean doDistributions = false;
50
    boolean doCommonNames = false;
51
    boolean doNotes = false;
52
    boolean doAdditionalTaxonSources = false;
53

    
54
    private Source source = new Source(defaultSource);
55
    private Source destination = defaultDestination;
56

    
57
    private String origEuroMed = "OriginalDB = 'E+M' ";
58

    
59
    public void invoke(Source source, Source destination){
60
        logger.warn("Validate destination " +  destination.getDatabase());
61
        boolean success = true;
62
        try {
63
            this.source = source;
64
            this.destination = destination;
65
            success &= testReferences();
66
            success &= testTaxa();
67
            success &= testTaxonRelations();
68
            success &= testDistributions();
69
            success &= testCommonNames();
70
            success &= testNotes();
71
            success &= testAdditionalTaxonSources();
72
        } catch (Exception e) {
73
            e.printStackTrace();
74
            success = false;
75
        }
76
        //TBC
77
        System.out.println("end validation " + (success? "":"NOT ") + "successful.");
78
    }
79

    
80
    private boolean testAdditionalTaxonSources() throws SQLException {
81
        if (!doAdditionalTaxonSources){
82
            return true;
83
        }
84
        System.out.println("Start validate additional taxon sources");
85
        boolean success = testAdditionalTaxonSourcesCount();
86
        if (success){
87
              success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
88
        }
89
        return success;
90
    }
91

    
92
    private boolean testNotes() throws SQLException {
93
        if (!doNotes){
94
            return true;
95
        }
96
        System.out.println("Start validate notes");
97
        boolean success = testNotesCount();
98
        if (success){
99
              success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
100
        }
101
        return success;
102
    }
103

    
104
    private boolean testDistributions() throws SQLException {
105
        if (!doDistributions){
106
            return true;
107
        }
108
        System.out.println("Start validate distributions");
109
        boolean success = testDistributionCount();
110
        if (!success){
111
              success &= testSingleDistributions(source.getUniqueInteger(distributionCountSQL));
112
        }
113
        return success;
114
    }
115

    
116
    private boolean testCommonNames() throws SQLException {
117
        if (!doCommonNames){
118
            return true;
119
        }
120
        System.out.println("Start validate common names");
121
        boolean success = testCommonNameCount();
122
        if (success){
123
            success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
124
        }
125
        return success;
126
    }
127

    
128
    int countSynonyms;
129
    int countIncludedIns;
130
    private boolean testTaxonRelations() throws SQLException {
131
        if (!doTaxRels){
132
            return true;
133
        }
134
        System.out.println("Start validate taxon relations");
135
        boolean success = testSynonymRelations();
136
        success &= testIncludedInRelations();
137
        success &= testTotalRelations();
138
        success &= testNameRelations();
139
        return success;
140
    }
141

    
142
    private boolean testTotalRelations() {
143
        if (!(countSynonyms < 0 || countIncludedIns < 0)){
144
            int countTotalSrc = countSynonyms + countIncludedIns;
145
            int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
146
            boolean success = equals("Taxrel count + 1 must be same as source taxon count ", countTotalSrc+1, countSrc, String.valueOf(-1));
147
            int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t."+ origEuroMed);
148
            success &= equals("Taxrel count + 1 must be same as destination taxon count ", countTotalSrc+1, countDest, String.valueOf(-1));
149
            return success;
150
        }else{
151
            return false;
152
        }
153
    }
154

    
155
    private final String countSynonymRelation = "SELECT count(*) FROM TaxonBase syn LEFT JOIN TaxonBase acc ON syn.acceptedTaxon_id = acc.id WHERE syn.publish = 1 AND acc.publish = 1 ";
156
    private boolean testSynonymRelations() throws SQLException {
157

    
158
        int countSrc = source.getUniqueInteger(countSynonymRelation);
159
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
160
        boolean success = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
161
        if (success){
162
            //TODO test single synonym relations
163
            success &= testSingleSynonymRelations(source.getUniqueInteger(countSynonymRelation));
164
        }
165
        countSynonyms = (countSrc == countDest)? countSrc : -1;
166
        return success;
167
    }
168

    
169
    private boolean testSingleSynonymRelations(int n) throws SQLException {
170
        boolean success = true;
171
        ResultSet srcRS = source.getResultSet(""
172
                + " SELECT t.id tid, pt.id pid "
173
                + " FROM TaxonNode tn "
174
                + "   INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
175
                + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
176
                + "   LEFT JOIN TaxonBase  pt ON ptn.taxon_id = pt.id "
177
                + " WHERE t.publish = 1 && pt.publish = 1 "
178
                + " ORDER BY CAST(tb.id as char(20)) ");
179

    
180
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
181
                + " FROM RelTaxon rel "
182
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
183
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
184
                + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk > 101 "
185
                + " ORDER BY t1.IdInSource");
186
        int i = 0;
187
        while (srcRS.next() && destRS.next()){
188
            success &= testSingleSynonymRelation(srcRS, destRS);
189
            i++;
190
        }
191
        success &= equals("Synonym relation count for single compare", n, i, String.valueOf(-1));
192
        return success;
193
    }
194

    
195
    private boolean testSingleSynonymRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
196
        String id = String.valueOf(srcRS.getInt("id"));
197
        boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
198
        success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("tu_accfinal"), destRS.getString("t2Id"), id);
199
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
200
        success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
201
        //TODO enable after next import
202
//        success &= isNull("notes", destRS);
203
        //complete if no further relations need to added
204
        return success;
205
    }
206

    
207
    private boolean testNameRelations() {
208
        //Name relations
209
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM NameRelationship WHERE ("
210
               + " 1=1 "
211
                 + ")");
212
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
213
        boolean success = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
214
        if (success){
215
            //TODO test single name relation
216
//            success &= testSingleNameRelations(source.getUniqueInteger(countSynonymRelation));
217
        }
218
        return success;
219
    }
220

    
221
    private final String countParentRelation  = "SELECT count(*) "
222
            + " FROM TaxonNode tn "
223
            + " INNER JOIN TaxonBase tb ON tn.taxon_id = tb.id "
224
            + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
225
            + "   LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
226
            + " WHERE tb.publish = 1 && pt.publish = 1  ";
227

    
228
    private boolean testIncludedInRelations() throws SQLException {
229
        int countSrc = source.getUniqueInteger(countParentRelation);
230
        int  countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
231
        boolean success = equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
232
        if (success){
233
            success &= testSingleTaxonRelations(source.getUniqueInteger(countParentRelation));
234
        }
235
        countIncludedIns = (countSrc == countDest)? countSrc : -1;
236
        return success;
237
    }
238

    
239
    private boolean testTaxa() throws SQLException {
240
        if (!doTaxa){
241
            return true;
242
        }
243
        System.out.println("Start validate taxa");
244
        boolean success = testTaxaCount();
245
        if (success){
246
            success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
247
        }
248
        return success;
249
    }
250

    
251
    String countReferencesStr = "SELECT count(*) FROM reference ";
252
    private boolean testReferences() throws SQLException {
253
        if (!doReferences){
254
            return true;
255
        }
256
        System.out.println("Start validate references");
257
        boolean success = testReferenceCount();
258
        if (success){
259
            success &= testSingleReferences(source.getUniqueInteger(countReferencesStr));
260
        }
261
        return success;
262
    }
263

    
264
    private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts ";
265
    private boolean testAdditionalTaxonSourcesCount() {
266
        int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
267
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
268
        return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
269
    }
270

    
271
    private boolean testNotesCount() {
272
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
273
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
274
                + " WHERE (1=1) ");
275
        boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
276

    
277
        return result;
278
    }
279

    
280
    private String distributionCountWhere = " WHERE deb.DTYPE = 'Distribution' AND tb.publish = 1 AND a.uuid NOT IN ("
281
            + "'111bdf38-7a32-440a-9808-8af1c9e54b51',"   //E+M
282
            //Former UUSR
283
            + "'c4a898ce-0f32-44fe-a8a3-278e11a4ba53','a575d608-dd53-4c01-b2af-5067d0711f64','da4e9cc3-b1cc-403a-81ff-bcc5d9fadbd1',"
284
            + "'7e0f8fa3-5db9-48f0-9fa8-87fcab3eaa53','2188e3a5-0446-47c8-b11b-b4b2b9a71c75','44f262e3-5091-4d28-8081-440d3978fb0b',"
285
            + "'efabc8fd-0b3c-475b-b532-e1ca0ba0bdbb') ";
286
    private String distributionCountSQL = "SELECT count(*) as n "
287
            + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
288
            + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
289
            + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
290
            + distributionCountWhere;
291
    private boolean testDistributionCount() {
292
        int countSrc = source.getUniqueInteger(distributionCountSQL);
293
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
294
        return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
295
    }
296

    
297
    private boolean testCommonNameCount() {
298
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
299
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
300
        return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
301
    }
302

    
303
    private final String countTaxon = "SELECT count(*) FROM TaxonBase tb WHERE tb.publish = 1 ";
304
    private final String destTaxonFilter = "(t.SourceFk IS NOT NULL OR t.AuthorString like 'auct.%' OR t.AuthorString like 'sensu %')";
305
    private boolean testTaxaCount() {
306
         int countSrc = source.getUniqueInteger(countTaxon);
307
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE "+ destTaxonFilter);
308
         boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
309
         return result;
310
     }
311

    
312
    private boolean testSingleTaxa(int n) throws SQLException {
313
        boolean success = true;
314
        ResultSet srcRS = source.getResultSet("SELECT CAST(tn.id as char(20)) tid, tb.uuid as GUID, pt.id parentId, "
315
                + "      tn.rank_id, rank.titleCache rank_name, "
316
                + "      sec.titleCache secTitle, "
317
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
318
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache nameTitleCache, tn.fullTitleCache nameFullTitleCache, "
319
                + "      tb.DTYPE taxStatus, tb.titleCache, tb.appendedPhrase tbAppendedPhrase, tb.sec_id secId, "
320
                + "      taxRelType.uuid taxRelTypeUuid, tr.relatedTo_id relToTaxonId, "
321
                + "      nsType.id nsId, nsType.idInVocabulary nsTitle, "
322
                + "      typeName_id, typeName.titleCache typeFullNameCache, "
323
                + "      CASE WHEN tb.updated IS NOT NULL THEN tb.updated ELSE tb.created END as lastActionDate, "
324
                + "      CASE WHEN tb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
325
                + " FROM TaxonBase tb "
326
                + "     LEFT JOIN TaxonName tn on tb.name_id = tn.id "
327
                + "     LEFT JOIN DefinedTermBase rank ON rank.id = tn.rank_id "
328
                + "     LEFT JOIN Reference sec ON sec.id = tb.sec_id "
329
                + "     LEFT JOIN TaxonName_NomenclaturalStatus nsMN ON tn.id = nsMN.TaxonName_id "
330
                + "     LEFT JOIN NomenclaturalStatus ns ON ns.id = nsMN.status_id "
331
                + "     LEFT JOIN DefinedTermBase nsType ON nsType.id = ns.type_id "
332
                + "     LEFT JOIN TaxonName_TypeDesignationBase typeMN ON typeMN.TaxonName_id = tn.id "
333
                + "     LEFT JOIN TypeDesignationBase td ON td.id = typeMN.typedesignations_id "
334
                + "     LEFT JOIN TaxonName typeName ON typeName.id = td.typeName_id "
335
                + "     LEFT JOIN TaxonNode n ON n.taxon_id = tb.id "
336
                + "     LEFT JOIN TaxonNode ptn ON n.parent_id = ptn.id "
337
                + "     LEFT JOIN TaxonBase pt ON pt.id = ptn.taxon_id AND pt.publish = 1 "
338
                + "     LEFT JOIN TaxonRelationship tr ON tr.relatedFrom_id = tb.id "
339
                + "     LEFT JOIN TaxonBase tbRelTo ON tr.relatedTo_id = tbRelTo.id "
340
                + "     LEFT JOIN DefinedTermBase taxRelType ON taxRelType.id = tr.type_id"
341
                + " WHERE tb.publish = 1 "
342
                + " GROUP BY tid, GUID, tn.rank_id, rank.titleCache, secTitle,"
343
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
344
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache, "
345
                + "      tb.DTYPE, tb.updated, tb.created "    //for duplicates caused by >1 name status
346
                + " ORDER BY tid, GUID, lastActionDate ");
347
        ResultSet destRS = destination.getResultSet("SELECT t.*, "
348
                + "     pt.IdInSource parentSourceId, "  //not needed
349
                + "     s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
350
                + " FROM Taxon t "
351
                + "    LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
352
                + "    LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
353
                + "    LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
354
                + "    LEFT JOIN Source s ON s.SourceId = t.SourceFk "
355
                + " WHERE t."+ origEuroMed + " AND " + destTaxonFilter   //FIXME remove SourceFk filter is only preliminary for first check
356
                + " ORDER BY t.IdInSource, t.GUID, t.LastActionDate, AuthorString ");
357
        int i = 0;
358
        logger.error("remove SourceFk filter is only preliminary for first check");
359
        while (srcRS.next() && destRS.next()){
360
            success &= testSingleTaxon(srcRS, destRS);
361
            i++;
362
        }
363
        success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
364
        return success;
365
    }
366

    
367
    private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
368
        String id = String.valueOf(srcRS.getInt("tid"));
369
        //TODO decide, according to SQL it also contains the taxon UUID, but in PESI2014 backup I can't find this
370
        boolean success = equals("Taxon ID", "NameId: " + srcRS.getInt("tid"), destRS.getString("IdInSource"), id);
371
        success &= equals("Taxon source", srcRS.getString("secTitle"), destRS.getString("sourceName"), id);
372

    
373
        success &= equals("Taxon kingdomFk", "3", destRS.getString("KingdomFk"), id);
374
//difficult to test        success &= equals("Taxon rank fk", srcRS.getString("rank_id"), destRS.getString("RankFk"), id);
375
        success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name")), destRS.getString("Rank"), id);
376
        success &= equals("Taxon genusOrUninomial", srcRS.getString("genusOrUninomial"), destRS.getString("GenusOrUninomial"), id) ;
377
        success &= equals("Taxon infraGenericEpithet", srcRS.getString("infraGenericEpithet"), destRS.getString("InfraGenericEpithet"), id) ;
378
        success &= equals("Taxon specificEpithet", srcRS.getString("specificEpithet"), destRS.getString("SpecificEpithet"), id) ;
379
        success &= equals("Taxon infraSpecificEpithet", srcRS.getString("infraSpecificEpithet"), destRS.getString("InfraSpecificEpithet"), id) ;
380

    
381
        success &= equals("Taxon websearchname", srcRS.getString("nameCache"), destRS.getString("WebSearchName"), id);
382
//TODO     success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
383
        success &= equals("Taxon authority", makeAuthorship(srcRS), destRS.getString("AuthorString"), id);
384
        success &= equals("Taxon FullName", makeFullName(srcRS), destRS.getString("FullName"), id);
385
        success &= equals("Taxon NomRefString", makeNomRefString(srcRS), destRS.getString("NomRefString"), id);
386
//      success &= equals("Taxon DisplayName", makeDisplayName(srcRS), destRS.getString("DisplayName"), id);  //in ERMS according to SQL script same as FullName, no nom.ref. information attached
387
//difficult to test   success &= equals("Taxon NameStatusFk", nullSafeInt(srcRS, "nsId"),nullSafeInt( destRS,"NameStatusFk"), id);
388
        success &= equals("Taxon NameStatusCache", srcRS.getString("nsTitle"), destRS.getString("NameStatusCache"), id);
389

    
390
        //TODO mostly Taxonomically Valueless
391
//        success &= equals("Taxon TaxonStatusFk", mapTaxStatusFk(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), nullSafeInt( destRS,"TaxonStatusFk"), id);
392
//        success &= equals("Taxon TaxonStatusCache", mapTaxStatus(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), destRS.getString("TaxonStatusCache"), id);
393

    
394
        success &= equals("Taxon ParentTaxonFk", nullSafeInt(srcRS, "parentId"), nullSafeInt(destRS, "ParentTaxonFk"), id);
395

    
396
        Integer origTypeNameFk = nullSafeInt(srcRS, "typeName_id");
397
        success &= equals("Taxon TypeNameFk", origTypeNameFk == null? null : "NameId: " + origTypeNameFk, destRS.getString("typeSourceId"), id);
398
        success &= equals("Taxon TypeFullNameCache", srcRS.getString("typeFullNameCache"), destRS.getString("TypeFullNameCache"), id);
399
        //according to SQL always constant, could be changed in future
400
        success &= equals("Taxon QualityStatusFK", 2, nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
401
        success &= equals("Taxon QualityStatusCache", "Added by Database Management Team", destRS.getString("QualityStatusCache"), id);
402
//TODO TreeIndex
403
        success &= isNull("FossilStatusFk", destRS);
404
        success &= isNull("FossilStatusCache", destRS);
405
        success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
406
        success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
407
        success &= isNull("ExpertGUID", destRS);  //according to SQL + PESI2014
408
//FIXME        success &= equals("Taxon ExpertName", srcRS.getString("secTitle"), destRS.getString("ExpertName"), id);
409
//FIXME        success &= isNull("SpeciesExpertGUID", destRS);
410
        success &= equals("Taxon SpeciesExpertName", srcRS.getString("secTitle"), destRS.getString("SpeciesExpertName"), id);
411
//FIXME !!        success &= equals("Taxon cache citation", srcRS.getString("secTitle"), destRS.getString("CacheCitation"), id);
412
        success &= equals("Taxon Last Action", srcRS.getString("lastAction"),  destRS.getString("LastAction"), id);
413
        success &= equals("Taxon Last Action Date", srcRS.getTimestamp("lastActionDate"),  destRS.getTimestamp("LastActionDate"), id);
414

    
415
        success &= isNull("GUID2", destRS);  //only relevant after merge
416
        success &= isNull("DerivedFromGuid2", destRS);  //only relevant after merge
417
        return success;
418
    }
419

    
420
    private String makeAuthorship(ResultSet srcRs) throws SQLException {
421
        boolean isMisapplied = isMisapplied(srcRs);
422
        if (isMisapplied){
423
            String result = getMisappliedAuthor(srcRs).trim();
424
            return result;
425
        }else{
426
            return srcRs.getString("authorshipCache");
427
        }
428
    }
429

    
430
    private String makeFullName(ResultSet srcRs) throws SQLException {
431
        boolean isMisapplied = isMisapplied(srcRs);
432
        if (isMisapplied){
433
            String result = srcRs.getString("nameCache");
434
            result += getMisappliedAuthor(srcRs);
435
            return result;
436
        }else{
437
            return srcRs.getString("nameTitleCache");
438
        }
439
    }
440

    
441
    private String makeNomRefString(ResultSet srcRS) throws SQLException {
442
        //there is no pure nomRefString field in CDM and also computing is only possible
443
        //with cache strategy which requires a running CDM instance. So this is a workaround
444
        //that maybe needs to be adapted
445
        String result = null;
446
        String fullTitle = srcRS.getString("nameFullTitleCache");
447
        String nameTitleCache = srcRS.getString("nameTitleCache");
448
        String nameStatus = CdmUtils.Nz(srcRS.getString("nsTitle"));
449
        if (fullTitle != null && nameTitleCache != null){
450
            result = fullTitle.substring(nameTitleCache.length())
451
                    .replaceAll("^, ", "")
452
                    .replaceAll("(, |^)"+nameStatus+"$", "")
453
                    .replaceAll("\\[as \".*\"\\]", "")
454
                    .trim();
455
        }
456
        return result;
457
    }
458

    
459
    private String mapTaxStatus(String dtype, String taxRelTypeUuidStr) {
460
        Integer statusFk = mapTaxStatusFk(dtype, taxRelTypeUuidStr);
461
        if (statusFk == null){
462
            return null;
463
        }else if (statusFk == PesiTransformer.T_STATUS_ACCEPTED){
464
            return "accepted";
465
        }else if (statusFk == PesiTransformer.T_STATUS_SYNONYM){
466
            return "synonym";
467
        }else if (statusFk == PesiTransformer.T_STATUS_PRO_PARTE_SYN){
468
            return "pro parte synonym";
469
        }else if (statusFk == PesiTransformer.T_STATUS_PARTIAL_SYN){
470
            return "partial synonym";
471
        }
472
        return null;
473
    }
474

    
475
    private Integer mapTaxStatusFk(String dtype, String taxRelTypeUuidStr) {
476
        if (dtype == null){
477
            return null;
478
        }else if ("Synonym".equals(dtype)){
479
            return PesiTransformer.T_STATUS_SYNONYM;
480
        }else if ("Taxon".equals(dtype)){
481
            UUID relTypeUuid = taxRelTypeUuidStr == null? null: UUID.fromString(taxRelTypeUuidStr);
482
            if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){
483
                return PesiTransformer.T_STATUS_PRO_PARTE_SYN;
484
            }else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){
485
                return PesiTransformer.T_STATUS_PARTIAL_SYN;
486
            }else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){
487
                return PesiTransformer.T_STATUS_SYNONYM;  //no explicit MAN status exists in PESI
488
            }else{
489
                return PesiTransformer.T_STATUS_ACCEPTED;
490
            }
491
        }
492
        return null;
493
    }
494

    
495
    private String normalizeRank(String rankStr) {
496
        if (rankStr == null){return null;
497
        }else if (rankStr.equals("Convar")){return "Convariety";
498
        }else if (rankStr.equals("Unranked (infrageneric)")){return "Tax. infragen.";
499
        }else if (rankStr.equals("Unranked (infraspecific)")){return "Tax. infraspec.";
500
        }else if (rankStr.equals("Coll. species")){return "Coll. Species";
501
        }else if (rankStr.equals("Species Aggregate")){return "Aggregate";
502
        }else if (rankStr.equals("Subsection bot.")){return "Subsection";
503
        }return rankStr;
504
    }
505

    
506
    private String makeDisplayName(ResultSet srcRs) throws SQLException {
507
        boolean isMisapplied = isMisapplied(srcRs);
508

    
509
        String result;
510
        String nameTitle = srcRs.getString("nameTitleCache");
511
        String nameCache = srcRs.getString("nameCache");
512
        if(!isMisapplied){
513
            result = srcRs.getString("nameFullTitleCache");
514
            String taggedName = getTaggedNameTitle(nameCache, nameTitle);
515
            result = result.replace(nameTitle, taggedName);
516
            result = result.replaceAll("^<i>"+ UTF8.HYBRID , UTF8.HYBRID+ "<i>").replaceAll(" "+ UTF8.HYBRID, "</i> "+UTF8.HYBRID+"<i>");
517
        }else{
518
            result = srcRs.getString("nameCache");
519
            String taggedName = getTaggedNameTitle(nameCache, nameCache);
520
            result = result.replace(nameCache, taggedName);
521
            //misapplied
522
            result += getMisappliedAuthor(srcRs);
523
        }
524
        String nameStatus = CdmUtils.Nz(srcRs.getString("nsTitle"));
525
        result = result.replaceAll("(, |^)"+nameStatus+"$", "");
526
        return result;
527
    }
528

    
529
    private boolean isMisapplied(ResultSet srcRs) throws SQLException {
530
        String relTypeUuid = srcRs.getString("taxRelTypeUuid");
531
        boolean isMisapplied = relTypeUuid!=null
532
                && (relTypeUuid.equals(TaxonRelationshipType.uuidMisappliedNameFor.toString())
533
                   || relTypeUuid.equals(TaxonRelationshipType.uuidProParteMisappliedNameFor.toString())
534
                   || relTypeUuid.equals(TaxonRelationshipType.uuidPartialMisappliedNameFor.toString()))
535
                //TODO formatting of ppMANs not yet implemented
536
                && nullSafeInt(srcRs, "relToTaxonId") != null;
537
        return isMisapplied;
538
    }
539

    
540
    private String getMisappliedAuthor(ResultSet srcRs) throws SQLException {
541
        String result;
542
        String relAppendedPhrase = srcRs.getString("tbAppendedPhrase");
543
        String secId = srcRs.getString("secId");
544
        String secTitle = srcRs.getString("secTitle");
545
        if(relAppendedPhrase == null && secId == null) {
546
            result = " auct.";
547
        }else if (relAppendedPhrase != null && secId == null){
548
            result = " " + relAppendedPhrase;
549
        }else if (relAppendedPhrase == null && secId != null){
550
            result = " sensu " + secTitle;
551
        }else{
552
            result = " " + relAppendedPhrase + " " + secTitle;
553
        }
554
        String authorship = srcRs.getString("authorshipCache");
555
        if (isNotBlank(authorship)){
556
            result += ", non " + authorship;
557
        }
558
        return result;
559
    }
560

    
561
    private String getTaggedNameTitle(String nameCache, String nameTitle) {
562
        if (nameCache == null){
563
            logger.warn("NameCache is null");
564
            return nameTitle;
565
        }
566
        String result = null;
567
        try {
568
            String[] nameCacheSplit = nameCache.split(" ");
569
            String[] nameTitleSplit = nameTitle.split(" ");
570
            result = "";
571
            boolean currentIsName = false;
572
            for (int i=0, j=0; j < nameTitleSplit.length; j++){
573
                if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
574
                        && !isMarker(nameCacheSplit[i])){
575
                    if(!currentIsName){
576
                        result += " <i>" + nameCacheSplit[i];
577
                        currentIsName = true;
578
                    }else{
579
                        result += " " + nameCacheSplit[i];
580
                    }
581
                    if((j+1)==nameTitleSplit.length){
582
                        result += "</i>";
583
                    }
584
                    i++;
585
                }else{
586
                    if(currentIsName){
587
                        result += "</i>";
588
                        currentIsName = false;
589
                    }
590
                    result += " " + nameTitleSplit[j];
591
                    if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
592
                            && isMarker(nameCacheSplit[i])){
593
                        i++;
594
                    }
595
                }
596
            }
597
            return result.trim();
598
        } catch (Exception e) {
599
            e.printStackTrace();
600
            return result;
601
        }
602
    }
603

    
604
    private boolean isMarker(String nameCacheSplit) {
605
        return nameCacheSplit.endsWith(".") || nameCacheSplit.equals("[unranked]")
606
                || nameCacheSplit.equals("grex")|| nameCacheSplit.equals("proles")
607
                || nameCacheSplit.equals("race");
608
    }
609

    
610
    private boolean testSingleTaxonRelations(int n) throws SQLException {
611
        boolean success = true;
612
        ResultSet srcRS = source.getResultSet(""
613
                + " SELECT t.name_id tid, pt.name_id pid "
614
                + " FROM TaxonNode tn "
615
                + "   INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
616
                + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
617
                + "   LEFT JOIN TaxonBase  pt ON ptn.taxon_id = pt.id "
618
                + " WHERE t.publish = 1 && pt.publish = 1 "
619
                + " ORDER BY CAST(t.name_id as char(20)) ");
620

    
621
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
622
                + " FROM RelTaxon rel "
623
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
624
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
625
                + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk = 101 "
626
                + " ORDER BY t1.IdInSource");
627
        int i = 0;
628
        while (srcRS.next() && destRS.next()){
629
            success &= testSingleTaxonRelation(srcRS, destRS);
630
            i++;
631
        }
632
        success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
633
        return success;
634
    }
635

    
636
    private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
637
        String id = String.valueOf(srcRS.getInt("tid"));
638
        boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("tid"), destRS.getString("t1Id"), id);
639
        success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("pid"), destRS.getString("t2Id"), id);
640
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_TAXONOMICALLY_INCLUDED_IN, destRS.getInt("RelTaxonQualifierFk"), id);
641
        success &= equals("Taxon relation qualifier cache", "is taxonomically included in", destRS.getString("RelQualifierCache"), id);
642
        //TODO enable after next import
643
        success &= isNull("notes", destRS);
644
        //complete if no further relations need to added
645
        return success;
646
    }
647

    
648
    private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
649
        boolean success = true;
650
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, MN.*, s.*, su.sourceuse_name "
651
                + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
652
                + "    LEFT JOIN sources s ON s.id = MN.source_id "
653
                + "    LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
654
                + " ORDER BY CAST(tu.id as char(20)), MN.sourceuse_id, s.id ");  //, no.note (not possible because ntext
655
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
656
                + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
657
                + "    INNER JOIN Source s ON s.SourceId = ats.SourceFk "
658
                + "    LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
659
                + " WHERE t."+origEuroMed
660
                + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
661
        int count = 0;
662
        while (srcRs.next() && destRs.next()){
663
            success &= testSingleAdditionalTaxonSource(srcRs, destRs);
664
            count++;
665
        }
666
        success &= equals("Notes count differs", n, count, "-1");
667
        return success;
668
    }
669

    
670
    private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
671
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
672
        boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
673
        success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id);  //currently we use the same id in ERMS and PESI
674
        success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
675
        success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
676
        //TODO some records are still truncated ~ >820 characters
677
        success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
678
        success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
679
        //Complete
680
        return success;
681
    }
682

    
683
    private boolean testSingleNotes(int n) throws SQLException {
684
        boolean success = true;
685
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, no.*, l.LanName "
686
                + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
687
                + "    LEFT JOIN languages l ON l.LanID = no.lan_id "
688
                + " ORDER BY CAST(tu.id as char(20)), no.type, no.noteSortable ");  //, no.note (not possible because ntext
689
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
690
                + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
691
                + "    LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
692
                + "    LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
693
                + " WHERE t." + origEuroMed
694
                + "      AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
695
                + " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1  ");
696
        int count = 0;
697
        while (srcRs.next() && destRs.next()){
698
            success &= testSingleNote(srcRs, destRs);
699
            count++;
700
        }
701
        success &= equals("Notes count differs", n, count, "-1");
702
        return success;
703
    }
704

    
705
    private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
706
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
707
        boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
708
        success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
709
        success &= isNull("Note_2", destRs);
710
        success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
711
        success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
712
        success &= isNull("Region", destRs);
713
        success &= isNull("SpeciesExpertGUID", destRs);
714
        //SpeciesExpertName, LastAction, LastActionDate handled in separate method
715
        //complete
716
        return success;
717
    }
718

    
719
    private String normalizeNoteCatCache(String string) {
720
        return StringUtils.capitalize(string)
721
                .replace("Original Combination", "Original combination")
722
                .replace("Taxonomic remark", "Taxonomic Remark");
723
    }
724

    
725
    private boolean testSingleDistributions(int n) throws SQLException {
726
        boolean success = true;
727
        ResultSet srcRs = source.getResultSet(
728
                  " SELECT CAST(tb.name_id as char(20)) AS tid, a.idInVocabulary, a.titleCache area, st.uuid statusUuid, "
729
                + "        CASE WHEN deb.updated IS NOT NULL THEN deb.updated ELSE deb.created END as lastActionDate, "
730
                + "        CASE WHEN deb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
731
                + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
732
                + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
733
                + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
734
                + "    LEFT JOIN DefinedTermBase st ON st.id = deb.status_id "
735
                + distributionCountWhere
736
                + " ORDER BY CAST(tb.name_id as char(20)), a.idInVocabulary, a.titleCache ");
737
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaEmCode, oc.*, a.AreaName "
738
                + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
739
                + "    LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
740
                + " WHERE t." + origEuroMed
741
                + " ORDER BY t.IdInSource, a.AreaEmCode, a.AreaName, oc.Notes ");
742
        int count = 0;
743
        while (srcRs.next() && destRs.next()){
744
            success &= testSingleDistribution(srcRs, destRs);
745
            count++;
746
        }
747
        success &= equals("Distribution count differs", n, count, "-1");
748
        return success;
749
    }
750

    
751
    private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
752
        String id = String.valueOf(srcRs.getInt("tid") + "-" + srcRs.getString("area"));
753
        boolean success = equals("Distribution taxonID ", "NameId: " + String.valueOf(srcRs.getInt("tid")), destRs.getString("IdInSource"), id);
754
        success &= equals("Distribution AreaEmCode ", srcRs.getString("idInVocabulary"), destRs.getString("AreaEmCode"), id);
755
//        success &= equals("Distribution area name ", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaName"), id);
756
        success &= equals("Distribution area name cache", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaNameCache"), id);
757
        success &= equals("Distribution OccurrenceStatusFk", mapStatus(srcRs.getString("statusUuid")), destRs.getInt("OccurrenceStatusFk"), id);
758
//TODO        success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
759
        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table only, according to script there were values, but in PESI 2014 values existed only in OccurrenceSource table (for all only E+M records)
760
        success &= isNull("SourceCache", destRs);  //sources should be moved to extra table, see above
761
//TODO        success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
762
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in EM and according to script
763
        success &= isNull("SpeciesExpertName", destRs);  //SpeciesExpertName does not exist in EM and according to script
764
        success &= equals("Distribution Last Action", srcRs.getString("lastAction"),  destRs.getString("LastAction"), id);
765
        success &= equals("Distribution Last Action Date", srcRs.getTimestamp("lastActionDate"),  destRs.getTimestamp("LastActionDate"), id);
766
        return success;
767
    }
768

    
769
    /**
770
     * @param string
771
     * @return
772
     */
773
    private Integer mapStatus(String uuidStr) {
774
        UUID uuid = UUID.fromString(uuidStr);
775
        if (uuid.equals(PresenceAbsenceTerm.uuidNativeError) ){  //native, reported in error
776
            return PesiTransformer.STATUS_ABSENT;
777
        }else if (uuid.equals(PresenceAbsenceTerm.uuidIntroducesAdventitious)  //casual, introduced adventitious
778
                || uuid.equals(PresenceAbsenceTerm.uuidIntroducedUncertainDegreeNaturalisation)//introduced: uncertain degree of naturalisation
779
                || uuid.equals(PresenceAbsenceTerm.uuidIntroduced)){
780
            return PesiTransformer.STATUS_INTRODUCED;
781
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNative) ){  //native
782
            return PesiTransformer.STATUS_NATIVE;
783
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNaturalised) ){  //naturalised
784
            return PesiTransformer.STATUS_NATURALISED;
785
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNativePresenceQuestionable) ){  //native, presence questionable
786
            return PesiTransformer.STATUS_DOUBTFUL;
787
        }else if (uuid.equals(PresenceAbsenceTerm.uuidCultivated) ){  //cultivated
788
            return PesiTransformer.STATUS_MANAGED;
789
        }else if (uuid.equals(BerlinModelTransformer.uuidStatusUndefined) ){  //native, reported in error
790
            return -1;
791
        }
792

    
793
        return null;
794
    }
795

    
796
    private String normalizeDistrArea(String area) {
797
        if (area == null){
798
            return null;
799
        }else if ("France".equals(area)){return "French mainland";
800
        }else if ("France, with Channel Islands and Monaco".equals(area)){return "France";
801
        }else if ("Greece".equals(area)){return "Greece with Cyclades and more islands";
802
        }else if ("Spain, with Gibraltar and Andorra (without Bl and Ca)".equals(area)){return "Spain";
803
        }else if ("Italy, with San Marino and Vatican City (without Sa and Si(S))".equals(area)){return "Italy";
804
        }else if ("Morocco, with Spanish territories".equals(area)){return "Morocco";
805
        }else if ("Serbia including Kosovo and Vojvodina".equals(area)){return "Serbia including Vojvodina and with Kosovo";
806
        }else if ("Caucasia (Ab + Ar + Gg + Rf(CS))".equals(area)){return "Caucasus region";
807
        }else if ("Georgia, with Abchasia and Adzharia".equals(area)){return "Georgia";
808
        }else if ("Canary Is.".equals(area)){return "Canary Islands";
809
        }else if ("Kriti with Karpathos, Kasos & Gavdhos".equals(area)){return "Crete with Karpathos, Kasos & Gavdhos";
810
        }else if ("Ireland, with N Ireland".equals(area)){return "Ireland";
811
        }else if ("mainland Spain".equals(area)){return "Kingdom of Spain";
812
        }else if ("Portugal".equals(area)){return "Portuguese mainland";
813
        }else if ("Svalbard".equals(area)){return "Svalbard with Björnöya and Jan Mayen";
814
        }else if ("Norway".equals(area)){return "Norwegian mainland";
815
        }else if ("Ukraine".equals(area)){return "Ukraine including Crimea";
816
        }else if ("Turkey-in-Europe".equals(area)){return "European Turkey";
817
        }else if ("Azerbaijan".equals(area)){return "Azerbaijan including Nakhichevan";
818
        }else if ("Ireland".equals(area)){return "Republic of Ireland";
819
        }else if ("France".equals(area)){return "French mainland";
820
        }
821
        return area;
822
    }
823

    
824
    private boolean testSingleCommonNames(int n) throws SQLException {
825
        boolean success = true;
826
        ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
827
                + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
828
                + " ORDER BY CAST(tu.id as char(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
829
        ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
830
                + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
831
                + " WHERE t." + origEuroMed
832
                + " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate ");  //sorting also lastActionDate results in a minimum of exact duplicate problems
833
        int count = 0;
834
        while (srcRs.next() && destRs.next()){
835
            success &= testSingleCommonName(srcRs, destRs);
836
            count++;
837
        }
838
        success &= equals("Common name count differs", n, count, "-1");
839
        return success;
840
    }
841

    
842
    boolean prefer639_3 = true;
843
    String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
844
    String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
845

    
846
    private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
847
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
848
        boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
849
        success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
850
        success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
851
        success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
852
        //TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
853
        success &= isNull("Region", destRs);  //region does not seem to exist in ERMS
854
        //TODO see comments
855
//        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table, check with PESI 2014
856
//        success &= isNull("SourceNameCache", destRs);  //sources should be moved to extra table, check with PESI 2014
857
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in ERMS
858
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
859
        //complete
860
        return success;
861
    }
862

    
863
    private String normalizeLang(String string) {
864
        if ("Spanish".equals(string)){
865
            return "Spanish, Castillian";
866
        }else if ("Modern Greek (1453-)".equals(string)){
867
            return "Greek";
868
        }else if ("Malay (individual language)".equals(string)){
869
            return "Malay";
870
        }else if ("Swahili (individual language)".equals(string)){
871
            return "Swahili";
872
        }
873

    
874
        return string;
875
    }
876

    
877
    private String getLanguageIso(ResultSet destRs) throws SQLException {
878
        String result = destRs.getString(preferredISO639);
879
        if (result == null){
880
            result = destRs.getString(alternativeISO639);
881
        }
882
        return result;
883
    }
884

    
885
    private boolean testSingleReferences(int count) throws SQLException {
886
        boolean success = true;
887
        ResultSet srcRS = source.getResultSet("SELECT r.*, a.titleCache author "
888
                + " FROM Reference r LEFT OUTER JOIN AgentBase a ON r.authorship_id = a.id "
889
                + " ORDER BY r.id ");
890
        ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
891
                + " WHERE s." + origEuroMed
892
                + " ORDER BY s.RefIdInSource ");  // +1 for the source reference "erms" but this has no OriginalDB
893
        int i = 0;
894
        while (srcRS.next() && destRS.next()){
895
            success &= testSingleReference(srcRS, destRS);
896
            i++;
897
        }
898
        success &= equals("References count differs", count, i, "-1");
899
        return success;
900
    }
901

    
902
    private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
903
        String id = String.valueOf(srcRS.getInt("id"));
904
        boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
905
        success &= isNull("IMIS_Id", destRS);  //for E+M no IMIS id exists
906
        success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("refType")), destRS.getInt("SourceCategoryFk"), id);
907
        success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("refType")), destRS.getString("SourceCategoryCache"), id);
908
        success &= equals("Reference name ", srcRS.getString("titleCache"), destRS.getString("Name"), id);
909
        success &= equals("Reference abstract ", srcRS.getString("referenceAbstract"), destRS.getString("Abstract"), id);
910
        success &= equals("Reference title ", srcRS.getString("title"), destRS.getString("Title"), id);
911
        success &= equals("Reference author string ", srcRS.getString("author"), destRS.getString("AuthorString"), id);
912
        //TODO
913
        success &= equals("Reference year ", normalizeYear(srcRS), destRS.getString("RefYear"), id);
914
        //FIXME
915
//        success &= equals("Reference NomRefCache ", srcRS.getString("abbrevTitleCache"), destRS.getString("NomRefCache"), id);
916
        success &= equals("Reference DOI ", srcRS.getString("doi"), destRS.getString("Doi"), id);
917
        success &= equals("Reference link ", srcRS.getString("uri"), destRS.getString("Link"), id);
918
        //TODO Notes
919
//        success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
920
        //complete
921
        return success;
922
    }
923

    
924
    private Integer convertSourceTypeFk(String sourceType) {
925
        if (sourceType == null){
926
            return null;
927
        }else if ("DB".equals(sourceType)){
928
            return PesiTransformer.REF_DATABASE;
929
        }else if ("JOU".equals(sourceType)){
930
            return PesiTransformer.REF_JOURNAL;
931
        }else if ("BK".equals(sourceType)){
932
            return PesiTransformer.REF_BOOK;
933
        }else if ("GEN".equals(sourceType)){
934
            return PesiTransformer.REF_UNRESOLVED;
935
        }else if ("SER".equals(sourceType)){
936
//            TODO correct?
937
            return PesiTransformer.REF_UNRESOLVED;
938
        }else if ("i".equals(sourceType)){
939
            return 12;
940
        }
941
        return null;
942
    }
943
    private String convertSourceTypeCache(String sourceType) {
944
        if (sourceType == null){
945
            return null;
946
        }else if ("DB".equals(sourceType)){
947
            return "database";
948
        }else if ("JOU".equals(sourceType)){
949
            return "journal";
950
        }else if ("BK".equals(sourceType)){
951
            return "book";
952
        }else if ("SER".equals(sourceType)){
953
            return "published";
954
        }else if ("BK".equals(sourceType)){
955
            return "book";
956
        }else if ("GEN".equals(sourceType)){
957
            return "unresolved";
958
        }else if ("i".equals(sourceType)){
959
            //TODO
960
            return "i";
961
        }
962
        return null;
963
    }
964

    
965
    private boolean testReferenceCount() {
966
        int countSrc = source.getUniqueInteger(countReferencesStr);
967
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origEuroMed);  // +1 for the source reference "erms" but this has no OriginalDB
968
        boolean success = equals("Reference count ", countSrc, countDest, "-1");
969
        return success;
970
    }
971

    
972
    private String normalizeYear(ResultSet rs) throws SQLException {
973
        String freetext = rs.getString("datePublished_freetext");
974
        if(StringUtils.isNotBlank(freetext)){
975
            return freetext;
976
        }
977
        String start = rs.getString("datePublished_start");
978
        String end = rs.getString("datePublished_end");
979
        if (start != null){
980
            start = start.substring(0,4);
981
        }
982
        if (end != null){
983
            end = end.substring(0,4);
984
        }
985
        String result = start == null? null: start + (end==null? "": "-"+ end);
986
        return result;
987
    }
988

    
989
    private boolean isNull(String attrName, ResultSet destRS) throws SQLException {
990
        Object value = destRS.getObject(attrName);
991
        if (value != null){
992
            String message = attrName + " was expected to be null but was: " + value.toString();
993
            logger.warn(message);
994
            return false;
995
        }else{
996
            logger.info(attrName + " was null as expected");
997
            return true;
998
        }
999
    }
1000

    
1001
    private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
1002
        if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
1003
            LocalDate date1 = srcDate.toLocalDateTime().toLocalDate();
1004
            LocalDate date2 = destDate.toLocalDateTime().toLocalDate();
1005
            if (date1.equals(date2) || date1.plusDays(1).equals(date2)){
1006
                logger.info(messageStart + " were (almost) equal: " + srcDate);
1007
                return true;
1008
            }else{
1009
                String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+  srcDate + "; Destination: " + destDate;
1010
                logger.warn(message);
1011
                return false;
1012
            }
1013
        }else{
1014
            logger.info(messageStart + " were equal: " + srcDate);
1015
            return true;
1016
        }
1017
    }
1018

    
1019
    private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
1020
        String strId = id.equals("-1")? "": (id+ ": ");
1021
        if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
1022
            String message = strId+ messageStart + " must be equal, but was not.\n Source: "+  nSrc + "; Destination: " + nDest;
1023
            logger.warn(message);
1024
            return false;
1025
        }else{
1026
            logger.info(strId + messageStart + " were equal: " + nSrc);
1027
            return true;
1028
        }
1029
    }
1030

    
1031
    private boolean equals(String messageStart, String strSrc, String strDest, String id) {
1032
        if (StringUtils.isBlank(strSrc)){
1033
            strSrc = null;
1034
        }else{
1035
            strSrc = strSrc.trim();
1036
        }
1037
        //we do not trim strDest here because this should be done during import already. If not it should be shown here
1038
        if (!CdmUtils.nullSafeEqual(strSrc, strDest)){
1039
            int index = CdmUtils.diffIndex(strSrc, strDest);
1040
            String message = id+ ": " + messageStart + " must be equal, but was not at "+index+".\n  Source:      "+  strSrc + "\n  Destination: " + strDest;
1041
            logger.warn(message);
1042
            return false;
1043
        }else{
1044
            logger.info(id+ ": " + messageStart + " were equal: " + strSrc);
1045
            return true;
1046
        }
1047
    }
1048

    
1049
    protected Integer nullSafeInt(ResultSet rs, String columnName) throws SQLException {
1050
        Object intObject = rs.getObject(columnName);
1051
        if (intObject == null){
1052
            return null;
1053
        }else{
1054
            return Integer.valueOf(intObject.toString());
1055
        }
1056
    }
1057

    
1058
    private boolean isNotBlank(String str) {
1059
        return StringUtils.isNotBlank(str);
1060
    }
1061

    
1062
//** ************* MAIN ********************************************/
1063

    
1064

    
1065

    
1066
    public static void main(String[] args){
1067
        PesiEuroMedValidator validator = new PesiEuroMedValidator();
1068
        validator.invoke(new Source(defaultSource), defaultDestination);
1069
        System.exit(0);
1070
    }
1071
}
(2-2/2)