Project

General

Profile

Download (55.6 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.pesi.validate;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.sql.Timestamp;
14
import java.time.LocalDate;
15
import java.util.UUID;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19

    
20
import eu.etaxonomy.cdm.app.common.CdmDestinations;
21
import eu.etaxonomy.cdm.app.common.PesiDestinations;
22
import eu.etaxonomy.cdm.app.pesi.EuroMedSourceActivator;
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.UTF8;
25
import eu.etaxonomy.cdm.database.ICdmDataSource;
26
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
27
import eu.etaxonomy.cdm.io.common.Source;
28
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
29
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
30
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
31

    
32
/**
33
 * Tests the E+M -> PESI pipeline by comparing the source DB with destination PESI DB.
34
 *
35
 * @author a.mueller
36
 * @since 08.10.2019
37
 */
38
public class PesiEuroMedValidator extends PesiValidatorBase {
39

    
40
    private static final Logger logger = Logger.getLogger(PesiEuroMedValidator.class);
41

    
42
    private static final ICdmDataSource defaultSource = CdmDestinations.cdm_test_local_mysql_euromed();
43
//    private static final ICdmDataSource defaultSource = CdmDestinations.cdm_pesi2019_final();
44
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI();
45
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI_2();
46

    
47
    boolean doReferences = false;
48
    boolean doTaxa = true;
49
    boolean doTaxRels = false;
50
    boolean doDistributions = false;
51
    boolean doCommonNames = false;
52
    boolean doNotes = false;
53
    boolean doAdditionalTaxonSources = false;
54

    
55
    private Source source = new Source(defaultSource);
56
    private Source destination = defaultDestination;
57

    
58
    private String origEuroMed = "OriginalDB = 'E+M' ";
59

    
60
    public void invoke(Source source, Source destination){
61
        logger.warn("Validate destination " +  destination.getDatabase());
62
        boolean success = true;
63
        try {
64
            this.source = source;
65
            this.destination = destination;
66
            success &= testReferences();
67
            success &= testTaxa();
68
            success &= testTaxonRelations();
69
            success &= testDistributions();
70
            success &= testCommonNames();
71
            success &= testNotes();
72
            success &= testAdditionalTaxonSources();
73
        } catch (Exception e) {
74
            e.printStackTrace();
75
            success = false;
76
        }
77
        //TBC
78
        System.out.println("end validation " + (success? "":"NOT ") + "successful.");
79
    }
80

    
81
    private boolean testAdditionalTaxonSources() throws SQLException {
82
        if (!doAdditionalTaxonSources){
83
            System.out.println("Ignore validate additional taxon sources");
84
            return true;
85
        }
86
        System.out.println("Start validate additional taxon sources");
87
        boolean success = testAdditionalTaxonSourcesCount();
88
        if (success){
89
              success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
90
        }
91
        return success;
92
    }
93

    
94
    private boolean testNotes() throws SQLException {
95
        if (!doNotes){
96
            System.out.println("Ignore validate notes");
97
            return true;
98
        }
99
        System.out.println("Start validate notes");
100
        boolean success = testNotesCount();
101
        if (success){
102
              success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
103
        }
104
        return success;
105
    }
106

    
107
    private boolean testDistributions() throws SQLException {
108
        if (!doDistributions){
109
            System.out.println("Ignore validate distributions");
110
            return true;
111
        }
112
        System.out.println("Start validate distributions");
113
        boolean success = testDistributionCount();
114
        if (!success){
115
              success &= testSingleDistributions(source.getUniqueInteger(distributionCountSQL));
116
        }
117
        return success;
118
    }
119

    
120
    private boolean testCommonNames() throws SQLException {
121
        if (!doCommonNames){
122
            System.out.println("Ignore validate common names");
123
            return true;
124
        }
125
        System.out.println("Start validate common names");
126
        boolean success = testCommonNameCount();
127
        if (success){
128
            success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
129
        }
130
        return success;
131
    }
132

    
133
    int countSynonyms;
134
    int countIncludedIns;
135
    private boolean testTaxonRelations() throws SQLException {
136
        if (!doTaxRels){
137
            System.out.println("Ignore validate taxon relations");
138
            return true;
139
        }
140
        System.out.println("Start validate taxon relations");
141
        boolean success = testSynonymRelations();
142
        success &= testIncludedInRelations();
143
        success &= testTotalRelations();
144
        success &= testNameRelations();
145
        return success;
146
    }
147

    
148
    private boolean testTotalRelations() {
149
        if (!(countSynonyms < 0 || countIncludedIns < 0)){
150
            int countTotalSrc = countSynonyms + countIncludedIns;
151
            int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
152
            boolean success = equals("Taxrel count + 1 must be same as source taxon count ", countTotalSrc+1, countSrc, String.valueOf(-1));
153
            int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t."+ origEuroMed);
154
            success &= equals("Taxrel count + 1 must be same as destination taxon count ", countTotalSrc+1, countDest, String.valueOf(-1));
155
            return success;
156
        }else{
157
            return false;
158
        }
159
    }
160

    
161
    private final String countSynonymRelation = "SELECT count(*) FROM TaxonBase syn LEFT JOIN TaxonBase acc ON syn.acceptedTaxon_id = acc.id WHERE syn.publish = 1 AND acc.publish = 1 ";
162
    private boolean testSynonymRelations() throws SQLException {
163

    
164
        int countSrc = source.getUniqueInteger(countSynonymRelation);
165
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
166
        boolean success = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
167
        if (success){
168
            //TODO test single synonym relations
169
            success &= testSingleSynonymRelations(source.getUniqueInteger(countSynonymRelation));
170
        }
171
        countSynonyms = (countSrc == countDest)? countSrc : -1;
172
        return success;
173
    }
174

    
175
    private boolean testSingleSynonymRelations(int n) throws SQLException {
176
        boolean success = true;
177
        ResultSet srcRS = source.getResultSet(""
178
                + " SELECT t.id tid, pt.id pid "
179
                + " FROM TaxonNode tn "
180
                + "   INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
181
                + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
182
                + "   LEFT JOIN TaxonBase  pt ON ptn.taxon_id = pt.id "
183
                + " WHERE t.publish = 1 && pt.publish = 1 "
184
                + " ORDER BY CAST(tb.id as char(20)) ");
185

    
186
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
187
                + " FROM RelTaxon rel "
188
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
189
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
190
                + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk > 101 "
191
                + " ORDER BY t1.IdInSource");
192
        int i = 0;
193
        while (srcRS.next() && destRS.next()){
194
            success &= testSingleSynonymRelation(srcRS, destRS);
195
            i++;
196
        }
197
        success &= equals("Synonym relation count for single compare", n, i, String.valueOf(-1));
198
        return success;
199
    }
200

    
201
    private boolean testSingleSynonymRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
202
        String id = String.valueOf(srcRS.getInt("id"));
203
        boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
204
        success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("tu_accfinal"), destRS.getString("t2Id"), id);
205
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
206
        success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
207
        //TODO enable after next import
208
//        success &= isNull("notes", destRS);
209
        //complete if no further relations need to added
210
        return success;
211
    }
212

    
213
    private boolean testNameRelations() {
214
        //Name relations
215
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM NameRelationship WHERE ("
216
               + " 1=1 "
217
                 + ")");
218
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
219
        boolean success = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
220
        if (success){
221
            //TODO test single name relation
222
//            success &= testSingleNameRelations(source.getUniqueInteger(countSynonymRelation));
223
        }
224
        return success;
225
    }
226

    
227
    private final String countParentRelation  = "SELECT count(*) "
228
            + " FROM TaxonNode tn "
229
            + " INNER JOIN TaxonBase tb ON tn.taxon_id = tb.id "
230
            + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
231
            + "   LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
232
            + " WHERE tb.publish = 1 && pt.publish = 1  ";
233

    
234
    private boolean testIncludedInRelations() throws SQLException {
235
        int countSrc = source.getUniqueInteger(countParentRelation);
236
        int  countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
237
        boolean success = equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
238
        if (success){
239
            success &= testSingleTaxonRelations(source.getUniqueInteger(countParentRelation));
240
        }
241
        countIncludedIns = (countSrc == countDest)? countSrc : -1;
242
        return success;
243
    }
244

    
245
    private boolean testTaxa() throws SQLException {
246
        if (!doTaxa){
247
            System.out.println("Ignore validate taxa");
248
            return true;
249
        }
250
        System.out.println("Start validate taxa");
251
        boolean success = testTaxaCount();
252
        if (success){
253
            success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
254
        }
255
        return success;
256
    }
257

    
258
    String countReferencesStr = "SELECT count(*) FROM reference ";
259
    private boolean testReferences() throws SQLException {
260
        if (!doReferences){
261
            System.out.println("Ignore validate references");
262
            return true;
263
        }
264
        System.out.println("Start validate references");
265
        boolean success = testReferenceCount();
266
        if (success){
267
            success &= testSingleReferences(source.getUniqueInteger(countReferencesStr));
268
        }
269
        return success;
270
    }
271

    
272
    private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts ";
273
    private boolean testAdditionalTaxonSourcesCount() {
274
        int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
275
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
276
        return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
277
    }
278

    
279
    private boolean testNotesCount() {
280
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
281
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
282
                + " WHERE (1=1) ");
283
        boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
284

    
285
        return result;
286
    }
287

    
288
    private String distributionCountWhere = " WHERE deb.DTYPE = 'Distribution' AND tb.publish = 1 AND a.uuid NOT IN ("
289
            + "'111bdf38-7a32-440a-9808-8af1c9e54b51',"   //E+M
290
            //Former UUSR
291
            + "'c4a898ce-0f32-44fe-a8a3-278e11a4ba53','a575d608-dd53-4c01-b2af-5067d0711f64','da4e9cc3-b1cc-403a-81ff-bcc5d9fadbd1',"
292
            + "'7e0f8fa3-5db9-48f0-9fa8-87fcab3eaa53','2188e3a5-0446-47c8-b11b-b4b2b9a71c75','44f262e3-5091-4d28-8081-440d3978fb0b',"
293
            + "'efabc8fd-0b3c-475b-b532-e1ca0ba0bdbb') ";
294
    private String distributionCountSQL = "SELECT count(*) as n "
295
            + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
296
            + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
297
            + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
298
            + distributionCountWhere;
299
    private boolean testDistributionCount() {
300
        int countSrc = source.getUniqueInteger(distributionCountSQL);
301
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
302
        return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
303
    }
304

    
305
    private boolean testCommonNameCount() {
306
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
307
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
308
        return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
309
    }
310

    
311
    private final String countTaxon = "SELECT count(*) FROM TaxonBase tb WHERE tb.publish = 1 ";
312
    private final String destTaxonFilter = "(t.SourceFk IS NOT NULL OR t.AuthorString like 'auct.%' OR t.AuthorString like 'sensu %')";
313
    private boolean testTaxaCount() {
314
         int countSrc = source.getUniqueInteger(countTaxon);
315
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE "+ destTaxonFilter);
316
         boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
317
         return result;
318
    }
319

    
320
    private boolean testSingleTaxa(int n) throws SQLException {
321
        boolean success = true;
322
        ResultSet srcRS = source.getResultSet("SELECT CAST(tn.id as char(20)) tid, tb.uuid as GUID, pt.id parentId, "
323
                + "      tn.rank_id, rank.titleCache rank_name, "
324
                + "      sec.titleCache secTitle, secAut.titleCache secAutTitle, "
325
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
326
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache nameTitleCache, tn.fullTitleCache nameFullTitleCache, "
327
                + "      tb.DTYPE taxStatus, tb.titleCache, tb.appendedPhrase tbAppendedPhrase, tb.sec_id secId, "
328
                + "      taxRelType.uuid taxRelTypeUuid, tr.relatedTo_id relToTaxonId, "
329
                + "      nsType.id nsId, nsType.idInVocabulary nsTitle, "
330
                + "      typeName_id, typeName.titleCache typeFullNameCache, "
331
                + "      CASE WHEN tb.updated IS NOT NULL THEN tb.updated ELSE tb.created END as lastActionDate, "
332
                + "      CASE WHEN tb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
333
                + " FROM TaxonBase tb "
334
                + "     LEFT JOIN TaxonName tn on tb.name_id = tn.id "
335
                + "     LEFT JOIN DefinedTermBase rank ON rank.id = tn.rank_id "
336
                + "     LEFT JOIN Reference sec ON sec.id = tb.sec_id "
337
                + "     LEFT JOIN AgentBase secAut ON secAut.id = sec.authorship_id "
338
                + "     LEFT JOIN TaxonName_NomenclaturalStatus nsMN ON tn.id = nsMN.TaxonName_id "
339
                + "     LEFT JOIN NomenclaturalStatus ns ON ns.id = nsMN.status_id "
340
                + "     LEFT JOIN DefinedTermBase nsType ON nsType.id = ns.type_id "
341
                + "     LEFT JOIN TaxonName_TypeDesignationBase typeMN ON typeMN.TaxonName_id = tn.id "
342
                + "     LEFT JOIN TypeDesignationBase td ON td.id = typeMN.typedesignations_id "
343
                + "     LEFT JOIN TaxonName typeName ON typeName.id = td.typeName_id "
344
                + "     LEFT JOIN TaxonNode n ON n.taxon_id = tb.id "
345
                + "     LEFT JOIN TaxonNode ptn ON n.parent_id = ptn.id "
346
                + "     LEFT JOIN TaxonBase pt ON pt.id = ptn.taxon_id AND pt.publish = 1 "
347
                + "     LEFT JOIN TaxonRelationship tr ON tr.relatedFrom_id = tb.id "
348
                + "     LEFT JOIN TaxonBase tbRelTo ON tr.relatedTo_id = tbRelTo.id "
349
                + "     LEFT JOIN DefinedTermBase taxRelType ON taxRelType.id = tr.type_id"
350
                + " WHERE tb.publish = 1 "
351
                + " GROUP BY tid, GUID, tn.rank_id, rank.titleCache, secTitle,"
352
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
353
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache, "
354
                + "      tb.DTYPE, tb.updated, tb.created "    //for duplicates caused by >1 name status
355
                + " ORDER BY tid, GUID, lastActionDate ");
356
        ResultSet destRS = destination.getResultSet("SELECT t.*, "
357
                + "     pt.treeIndex pTreeIndex, pt.IdInSource parentSourceId, "  //not needed
358
                + "     s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
359
                + " FROM Taxon t "
360
                + "    LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
361
                + "    LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
362
                + "    LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
363
                + "    LEFT JOIN Source s ON s.SourceId = t.SourceFk "
364
                + " WHERE t."+ origEuroMed + " AND " + destTaxonFilter   //FIXME remove SourceFk filter is only preliminary for first check
365
                + " ORDER BY t.IdInSource, t.GUID, t.LastActionDate, AuthorString ");
366
        int i = 0;
367
        logger.error("remove SourceFk filter is only preliminary for first check");
368
        while (srcRS.next() && destRS.next()){
369
            success &= testSingleTaxon(srcRS, destRS);
370
            i++;
371
        }
372
        success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
373
        return success;
374
    }
375

    
376
    private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
377
        String id = String.valueOf(srcRS.getInt("tid"));
378
        //TODO decide, according to SQL it also contains the taxon UUID, but in PESI2014 backup I can't find this
379
        boolean success = equals("Taxon ID", "NameId: " + srcRS.getInt("tid"), destRS.getString("IdInSource"), id);
380
        success &= equals("Taxon source", makeSource(srcRS), destRS.getString("sourceName"), id);
381

    
382
        success &= equals("Taxon kingdomFk", "3", destRS.getString("KingdomFk"), id);
383
//difficult to test        success &= equals("Taxon rank fk", srcRS.getString("rank_id"), destRS.getString("RankFk"), id);
384
        success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name")), destRS.getString("Rank"), id);
385
        success &= equals("Taxon genusOrUninomial", srcRS.getString("genusOrUninomial"), destRS.getString("GenusOrUninomial"), id) ;
386
        success &= equals("Taxon infraGenericEpithet", srcRS.getString("infraGenericEpithet"), destRS.getString("InfraGenericEpithet"), id) ;
387
        success &= equals("Taxon specificEpithet", srcRS.getString("specificEpithet"), destRS.getString("SpecificEpithet"), id) ;
388
        success &= equals("Taxon infraSpecificEpithet", srcRS.getString("infraSpecificEpithet"), destRS.getString("InfraSpecificEpithet"), id) ;
389

    
390
        success &= equals("Taxon websearchname", srcRS.getString("nameCache"), destRS.getString("WebSearchName"), id);
391
//TODO     success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
392
        success &= equals("Taxon authority", makeAuthorship(srcRS), destRS.getString("AuthorString"), id);
393
        success &= equals("Taxon FullName", makeFullName(srcRS), destRS.getString("FullName"), id);
394
        success &= equals("Taxon NomRefString", makeNomRefString(srcRS), destRS.getString("NomRefString"), id);
395
//      success &= equals("Taxon DisplayName", makeDisplayName(srcRS), destRS.getString("DisplayName"), id);  //in ERMS according to SQL script same as FullName, no nom.ref. information attached
396
//difficult to test   success &= equals("Taxon NameStatusFk", nullSafeInt(srcRS, "nsId"),nullSafeInt( destRS,"NameStatusFk"), id);
397
        success &= equals("Taxon NameStatusCache", srcRS.getString("nsTitle"), destRS.getString("NameStatusCache"), id);
398

    
399
        //TODO mostly Taxonomically Valueless
400
//        success &= equals("Taxon TaxonStatusFk", mapTaxStatusFk(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), nullSafeInt( destRS,"TaxonStatusFk"), id);
401
//        success &= equals("Taxon TaxonStatusCache", mapTaxStatus(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), destRS.getString("TaxonStatusCache"), id);
402

    
403
        success &= equals("Taxon ParentTaxonFk", nullSafeInt(srcRS, "parentId"), nullSafeInt(destRS, "ParentTaxonFk"), id);
404

    
405
        Integer origTypeNameFk = nullSafeInt(srcRS, "typeName_id");
406
        success &= equals("Taxon TypeNameFk", origTypeNameFk == null? null : "NameId: " + origTypeNameFk, destRS.getString("typeSourceId"), id);
407
        success &= equals("Taxon TypeFullNameCache", srcRS.getString("typeFullNameCache"), destRS.getString("TypeFullNameCache"), id);
408
        //according to SQL always constant, could be changed in future
409
        success &= equals("Taxon QualityStatusFK", 2, nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
410
        success &= equals("Taxon QualityStatusCache", "Added by Database Management Team", destRS.getString("QualityStatusCache"), id);
411
        success &= testTreeIndex(destRS, "TreeIndex", "pTreeIndex", id);
412
        success &= isNull("FossilStatusFk", destRS, id);
413
        success &= isNull("FossilStatusCache", destRS, id);
414
        success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
415
        success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
416
        success &= isNull("ExpertGUID", destRS, id);  //according to SQL + PESI2014
417
        success &= isNull("SpeciesExpertGUID", destRS, id);
418
        //ExpertName = SpeciesExpertName in E+M according to SQL script, 4689x NULL
419
        success &= equals("Taxon ExpertName", makeExpertName(srcRS), destRS.getString("ExpertName"), id);
420
        success &= equals("Taxon SpeciesExpertName", makeExpertName(srcRS), destRS.getString("SpeciesExpertName"), id);
421
//FIXME !!        success &= equals("Taxon cache citation", srcRS.getString("secTitle"), destRS.getString("CacheCitation"), id);
422
        success &= equals("Taxon Last Action", srcRS.getString("lastAction"),  destRS.getString("LastAction"), id);
423
        success &= equals("Taxon Last Action Date", srcRS.getTimestamp("lastActionDate"),  destRS.getTimestamp("LastActionDate"), id);
424

    
425
        success &= isNull("GUID2", destRS, id);  //only relevant after merge
426
        success &= isNull("DerivedFromGuid2", destRS, id);  //only relevant after merge
427
        return success;
428
    }
429

    
430
    private String makeExpertName(ResultSet srcRs) throws SQLException {
431
        String autStr = srcRs.getString("secAutTitle");
432
        if (autStr != null){
433
            return autStr;
434
        }else{
435
            return srcRs.getString("secTitle");
436
        }
437
    }
438

    
439
    private String makeSource(ResultSet srcRs) throws SQLException {
440
        String secStr = srcRs.getString("secTitle");
441
        if (secStr == null){
442
            return EuroMedSourceActivator.sourceReferenceTitle;
443
        }else{
444
            return secStr;
445
        }
446
    }
447

    
448
    private String makeAuthorship(ResultSet srcRs) throws SQLException {
449
        boolean isMisapplied = isMisapplied(srcRs);
450
        if (isMisapplied){
451
            String result = getMisappliedAuthor(srcRs).trim();
452
            return result;
453
        }else{
454
            return srcRs.getString("authorshipCache");
455
        }
456
    }
457

    
458
    private String makeFullName(ResultSet srcRs) throws SQLException {
459
        boolean isMisapplied = isMisapplied(srcRs);
460
        if (isMisapplied){
461
            String result = srcRs.getString("nameCache");
462
            result += getMisappliedAuthor(srcRs);
463
            return result;
464
        }else{
465
            return srcRs.getString("nameTitleCache");
466
        }
467
    }
468

    
469
    private String makeNomRefString(ResultSet srcRS) throws SQLException {
470
        //there is no pure nomRefString field in CDM and also computing is only possible
471
        //with cache strategy which requires a running CDM instance. So this is a workaround
472
        //that maybe needs to be adapted
473
        String result = null;
474
        String fullTitle = srcRS.getString("nameFullTitleCache");
475
        String nameTitleCache = srcRS.getString("nameTitleCache");
476
        String nameStatus = CdmUtils.Nz(srcRS.getString("nsTitle"));
477
        if (fullTitle != null && nameTitleCache != null){
478
            result = fullTitle.substring(nameTitleCache.length())
479
                    .replaceAll("^, ", "")
480
                    .replaceAll("(, |^)"+nameStatus+"$", "")
481
                    .replaceAll("\\[as \".*\"\\]", "")
482
                    .replaceAll(", nom\\. cons\\., nom\\. altern\\.$", "")  //single case with 2 nom. status
483
                    .trim();
484
        }
485
        return result;
486
    }
487

    
488
    private String mapTaxStatus(String dtype, String taxRelTypeUuidStr) {
489
        Integer statusFk = mapTaxStatusFk(dtype, taxRelTypeUuidStr);
490
        if (statusFk == null){
491
            return null;
492
        }else if (statusFk == PesiTransformer.T_STATUS_ACCEPTED){
493
            return "accepted";
494
        }else if (statusFk == PesiTransformer.T_STATUS_SYNONYM){
495
            return "synonym";
496
        }else if (statusFk == PesiTransformer.T_STATUS_PRO_PARTE_SYN){
497
            return "pro parte synonym";
498
        }else if (statusFk == PesiTransformer.T_STATUS_PARTIAL_SYN){
499
            return "partial synonym";
500
        }
501
        return null;
502
    }
503

    
504
    private Integer mapTaxStatusFk(String dtype, String taxRelTypeUuidStr) {
505
        if (dtype == null){
506
            return null;
507
        }else if ("Synonym".equals(dtype)){
508
            return PesiTransformer.T_STATUS_SYNONYM;
509
        }else if ("Taxon".equals(dtype)){
510
            UUID relTypeUuid = taxRelTypeUuidStr == null? null: UUID.fromString(taxRelTypeUuidStr);
511
            if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){
512
                return PesiTransformer.T_STATUS_PRO_PARTE_SYN;
513
            }else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){
514
                return PesiTransformer.T_STATUS_PARTIAL_SYN;
515
            }else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){
516
                return PesiTransformer.T_STATUS_SYNONYM;  //no explicit MAN status exists in PESI
517
            }else{
518
                return PesiTransformer.T_STATUS_ACCEPTED;
519
            }
520
        }
521
        return null;
522
    }
523

    
524
    private String normalizeRank(String rankStr) {
525
        if (rankStr == null){return null;
526
        }else if (rankStr.equals("Convar")){return "Convariety";
527
        }else if (rankStr.equals("Unranked (infrageneric)")){return "Tax. infragen.";
528
        }else if (rankStr.equals("Unranked (infraspecific)")){return "Tax. infraspec.";
529
        }else if (rankStr.equals("Coll. species")){return "Coll. Species";
530
        }else if (rankStr.equals("Species Aggregate")){return "Aggregate";
531
        }else if (rankStr.equals("Subsection bot.")){return "Subsection";
532
        }return rankStr;
533
    }
534

    
535
    private String makeDisplayName(ResultSet srcRs) throws SQLException {
536
        boolean isMisapplied = isMisapplied(srcRs);
537

    
538
        String result;
539
        String nameTitle = srcRs.getString("nameTitleCache");
540
        String nameCache = srcRs.getString("nameCache");
541
        if(!isMisapplied){
542
            result = srcRs.getString("nameFullTitleCache");
543
            String taggedName = getTaggedNameTitle(nameCache, nameTitle);
544
            result = result.replace(nameTitle, taggedName);
545
            result = result.replaceAll("^<i>"+ UTF8.HYBRID , UTF8.HYBRID+ "<i>").replaceAll(" "+ UTF8.HYBRID, "</i> "+UTF8.HYBRID+"<i>");
546
        }else{
547
            result = srcRs.getString("nameCache");
548
            String taggedName = getTaggedNameTitle(nameCache, nameCache);
549
            result = result.replace(nameCache, taggedName);
550
            //misapplied
551
            result += getMisappliedAuthor(srcRs);
552
        }
553
        String nameStatus = CdmUtils.Nz(srcRs.getString("nsTitle"));
554
        result = result.replaceAll("(, |^)"+nameStatus+"$", "");
555
        return result;
556
    }
557

    
558
    private boolean isMisapplied(ResultSet srcRs) throws SQLException {
559
        String relTypeUuid = srcRs.getString("taxRelTypeUuid");
560
        boolean isMisapplied = relTypeUuid!=null
561
                && (relTypeUuid.equals(TaxonRelationshipType.uuidMisappliedNameFor.toString())
562
                   || relTypeUuid.equals(TaxonRelationshipType.uuidProParteMisappliedNameFor.toString())
563
                   || relTypeUuid.equals(TaxonRelationshipType.uuidPartialMisappliedNameFor.toString()))
564
                //TODO formatting of ppMANs not yet implemented
565
                && nullSafeInt(srcRs, "relToTaxonId") != null;
566
        return isMisapplied;
567
    }
568

    
569
    private String getMisappliedAuthor(ResultSet srcRs) throws SQLException {
570
        String result;
571
        String relAppendedPhrase = srcRs.getString("tbAppendedPhrase");
572
        String secId = srcRs.getString("secId");
573
        String secTitle = srcRs.getString("secTitle");
574
        if(relAppendedPhrase == null && secId == null) {
575
            result = " auct.";
576
        }else if (relAppendedPhrase != null && secId == null){
577
            result = " " + relAppendedPhrase;
578
        }else if (relAppendedPhrase == null && secId != null){
579
            result = " sensu " + secTitle;
580
        }else{
581
            result = " " + relAppendedPhrase + " " + secTitle;
582
        }
583
        String authorship = srcRs.getString("authorshipCache");
584
        if (isNotBlank(authorship)){
585
            result += ", non " + authorship;
586
        }
587
        return result;
588
    }
589

    
590
    private String getTaggedNameTitle(String nameCache, String nameTitle) {
591
        if (nameCache == null){
592
            logger.warn("NameCache is null");
593
            return nameTitle;
594
        }
595
        String result = null;
596
        try {
597
            String[] nameCacheSplit = nameCache.split(" ");
598
            String[] nameTitleSplit = nameTitle.split(" ");
599
            result = "";
600
            boolean currentIsName = false;
601
            for (int i=0, j=0; j < nameTitleSplit.length; j++){
602
                if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
603
                        && !isMarker(nameCacheSplit[i])){
604
                    if(!currentIsName){
605
                        result += " <i>" + nameCacheSplit[i];
606
                        currentIsName = true;
607
                    }else{
608
                        result += " " + nameCacheSplit[i];
609
                    }
610
                    if((j+1)==nameTitleSplit.length){
611
                        result += "</i>";
612
                    }
613
                    i++;
614
                }else{
615
                    if(currentIsName){
616
                        result += "</i>";
617
                        currentIsName = false;
618
                    }
619
                    result += " " + nameTitleSplit[j];
620
                    if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
621
                            && isMarker(nameCacheSplit[i])){
622
                        i++;
623
                    }
624
                }
625
            }
626
            return result.trim();
627
        } catch (Exception e) {
628
            e.printStackTrace();
629
            return result;
630
        }
631
    }
632

    
633
    private boolean isMarker(String nameCacheSplit) {
634
        return nameCacheSplit.endsWith(".") || nameCacheSplit.equals("[unranked]")
635
                || nameCacheSplit.equals("grex")|| nameCacheSplit.equals("proles")
636
                || nameCacheSplit.equals("race");
637
    }
638

    
639
    private boolean testSingleTaxonRelations(int n) throws SQLException {
640
        boolean success = true;
641
        ResultSet srcRS = source.getResultSet(""
642
                + " SELECT t.name_id tid, pt.name_id pid "
643
                + " FROM TaxonNode tn "
644
                + "   INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
645
                + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
646
                + "   LEFT JOIN TaxonBase  pt ON ptn.taxon_id = pt.id "
647
                + " WHERE t.publish = 1 && pt.publish = 1 "
648
                + " ORDER BY CAST(t.name_id as char(20)) ");
649

    
650
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
651
                + " FROM RelTaxon rel "
652
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
653
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
654
                + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk = 101 "
655
                + " ORDER BY t1.IdInSource");
656
        int i = 0;
657
        while (srcRS.next() && destRS.next()){
658
            success &= testSingleTaxonRelation(srcRS, destRS);
659
            i++;
660
        }
661
        success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
662
        return success;
663
    }
664

    
665
    private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
666
        String id = String.valueOf(srcRS.getInt("tid"));
667
        boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("tid"), destRS.getString("t1Id"), id);
668
        success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("pid"), destRS.getString("t2Id"), id);
669
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_TAXONOMICALLY_INCLUDED_IN, destRS.getInt("RelTaxonQualifierFk"), id);
670
        success &= equals("Taxon relation qualifier cache", "is taxonomically included in", destRS.getString("RelQualifierCache"), id);
671
        //TODO enable after next import
672
        success &= isNull("notes", destRS, id);
673
        //complete if no further relations need to added
674
        return success;
675
    }
676

    
677
    private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
678
        boolean success = true;
679
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, MN.*, s.*, su.sourceuse_name "
680
                + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
681
                + "    LEFT JOIN sources s ON s.id = MN.source_id "
682
                + "    LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
683
                + " ORDER BY CAST(tu.id as char(20)), MN.sourceuse_id, s.id ");  //, no.note (not possible because ntext
684
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
685
                + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
686
                + "    INNER JOIN Source s ON s.SourceId = ats.SourceFk "
687
                + "    LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
688
                + " WHERE t."+origEuroMed
689
                + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
690
        int count = 0;
691
        while (srcRs.next() && destRs.next()){
692
            success &= testSingleAdditionalTaxonSource(srcRs, destRs);
693
            count++;
694
        }
695
        success &= equals("Notes count differs", n, count, "-1");
696
        return success;
697
    }
698

    
699
    private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
700
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
701
        boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
702
        success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id);  //currently we use the same id in ERMS and PESI
703
        success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
704
        success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
705
        //TODO some records are still truncated ~ >820 characters
706
        success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
707
        success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
708
        //Complete
709
        return success;
710
    }
711

    
712
    private boolean testSingleNotes(int n) throws SQLException {
713
        boolean success = true;
714
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, no.*, l.LanName "
715
                + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
716
                + "    LEFT JOIN languages l ON l.LanID = no.lan_id "
717
                + " ORDER BY CAST(tu.id as char(20)), no.type, no.noteSortable ");  //, no.note (not possible because ntext
718
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
719
                + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
720
                + "    LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
721
                + "    LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
722
                + " WHERE t." + origEuroMed
723
                + "      AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
724
                + " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1  ");
725
        int count = 0;
726
        while (srcRs.next() && destRs.next()){
727
            success &= testSingleNote(srcRs, destRs);
728
            count++;
729
        }
730
        success &= equals("Notes count differs", n, count, "-1");
731
        return success;
732
    }
733

    
734
    private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
735
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
736
        boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
737
        success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
738
        success &= isNull("Note_2", destRs, id);
739
        success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
740
        success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
741
        success &= isNull("Region", destRs, id);
742
        success &= isNull("SpeciesExpertGUID", destRs, id);
743
        //SpeciesExpertName, LastAction, LastActionDate handled in separate method
744
        //complete
745
        return success;
746
    }
747

    
748
    private String normalizeNoteCatCache(String string) {
749
        return StringUtils.capitalize(string)
750
                .replace("Original Combination", "Original combination")
751
                .replace("Taxonomic remark", "Taxonomic Remark");
752
    }
753

    
754
    private boolean testSingleDistributions(int n) throws SQLException {
755
        boolean success = true;
756
        ResultSet srcRs = source.getResultSet(
757
                  " SELECT CAST(tb.name_id as char(20)) AS tid, a.idInVocabulary, a.titleCache area, st.uuid statusUuid, "
758
                + "        CASE WHEN deb.updated IS NOT NULL THEN deb.updated ELSE deb.created END as lastActionDate, "
759
                + "        CASE WHEN deb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
760
                + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
761
                + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
762
                + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
763
                + "    LEFT JOIN DefinedTermBase st ON st.id = deb.status_id "
764
                + distributionCountWhere
765
                + " ORDER BY CAST(tb.name_id as char(20)), a.idInVocabulary, a.titleCache ");
766
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaEmCode, oc.*, a.AreaName "
767
                + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
768
                + "    LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
769
                + " WHERE t." + origEuroMed
770
                + " ORDER BY t.IdInSource, a.AreaEmCode, a.AreaName, oc.Notes ");
771
        int count = 0;
772
        while (srcRs.next() && destRs.next()){
773
            success &= testSingleDistribution(srcRs, destRs);
774
            count++;
775
        }
776
        success &= equals("Distribution count differs", n, count, "-1");
777
        return success;
778
    }
779

    
780
    private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
781
        String id = String.valueOf(srcRs.getInt("tid") + "-" + srcRs.getString("area"));
782
        boolean success = equals("Distribution taxonID ", "NameId: " + String.valueOf(srcRs.getInt("tid")), destRs.getString("IdInSource"), id);
783
        success &= equals("Distribution AreaEmCode ", srcRs.getString("idInVocabulary"), destRs.getString("AreaEmCode"), id);
784
//        success &= equals("Distribution area name ", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaName"), id);
785
        success &= equals("Distribution area name cache", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaNameCache"), id);
786
        success &= equals("Distribution OccurrenceStatusFk", mapStatus(srcRs.getString("statusUuid")), destRs.getInt("OccurrenceStatusFk"), id);
787
//TODO        success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
788
        success &= isNull("SourceFk", destRs, id);  //sources should be moved to extra table only, according to script there were values, but in PESI 2014 values existed only in OccurrenceSource table (for all only E+M records)
789
        success &= isNull("SourceCache", destRs, id);  //sources should be moved to extra table, see above
790
//TODO        success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
791
        success &= isNull("SpeciesExpertGUID", destRs, id);  //SpeciesExpertGUID does not exist in EM and according to script
792
        success &= isNull("SpeciesExpertName", destRs, id);  //SpeciesExpertName does not exist in EM and according to script
793
        success &= equals("Distribution Last Action", srcRs.getString("lastAction"),  destRs.getString("LastAction"), id);
794
        success &= equals("Distribution Last Action Date", srcRs.getTimestamp("lastActionDate"),  destRs.getTimestamp("LastActionDate"), id);
795
        return success;
796
    }
797

    
798
    /**
799
     * @param string
800
     * @return
801
     */
802
    private Integer mapStatus(String uuidStr) {
803
        UUID uuid = UUID.fromString(uuidStr);
804
        if (uuid.equals(PresenceAbsenceTerm.uuidNativeError) ){  //native, reported in error
805
            return PesiTransformer.STATUS_ABSENT;
806
        }else if (uuid.equals(PresenceAbsenceTerm.uuidIntroducedAdventitious)  //casual, introduced adventitious
807
                || uuid.equals(PresenceAbsenceTerm.uuidIntroducedUncertainDegreeNaturalisation)//introduced: uncertain degree of naturalisation
808
                || uuid.equals(PresenceAbsenceTerm.uuidIntroduced)){
809
            return PesiTransformer.STATUS_INTRODUCED;
810
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNative) ){  //native
811
            return PesiTransformer.STATUS_NATIVE;
812
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNaturalised) ){  //naturalised
813
            return PesiTransformer.STATUS_NATURALISED;
814
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNativePresenceQuestionable) ){  //native, presence questionable
815
            return PesiTransformer.STATUS_DOUBTFUL;
816
        }else if (uuid.equals(PresenceAbsenceTerm.uuidCultivated) ){  //cultivated
817
            return PesiTransformer.STATUS_MANAGED;
818
        }else if (uuid.equals(BerlinModelTransformer.uuidStatusUndefined) ){  //native, reported in error
819
            return -1;
820
        }
821

    
822
        return null;
823
    }
824

    
825
    private String normalizeDistrArea(String area) {
826
        if (area == null){
827
            return null;
828
        }else if ("France".equals(area)){return "French mainland";
829
        }else if ("France, with Channel Islands and Monaco".equals(area)){return "France";
830
        }else if ("Greece".equals(area)){return "Greece with Cyclades and more islands";
831
        }else if ("Spain, with Gibraltar and Andorra (without Bl and Ca)".equals(area)){return "Spain";
832
        }else if ("Italy, with San Marino and Vatican City (without Sa and Si(S))".equals(area)){return "Italy";
833
        }else if ("Morocco, with Spanish territories".equals(area)){return "Morocco";
834
        }else if ("Serbia including Kosovo and Vojvodina".equals(area)){return "Serbia including Vojvodina and with Kosovo";
835
        }else if ("Caucasia (Ab + Ar + Gg + Rf(CS))".equals(area)){return "Caucasus region";
836
        }else if ("Georgia, with Abchasia and Adzharia".equals(area)){return "Georgia";
837
        }else if ("Canary Is.".equals(area)){return "Canary Islands";
838
        }else if ("Kriti with Karpathos, Kasos & Gavdhos".equals(area)){return "Crete with Karpathos, Kasos & Gavdhos";
839
        }else if ("Ireland, with N Ireland".equals(area)){return "Ireland";
840
        }else if ("mainland Spain".equals(area)){return "Kingdom of Spain";
841
        }else if ("Portugal".equals(area)){return "Portuguese mainland";
842
        }else if ("Svalbard".equals(area)){return "Svalbard with Björnöya and Jan Mayen";
843
        }else if ("Norway".equals(area)){return "Norwegian mainland";
844
        }else if ("Ukraine".equals(area)){return "Ukraine including Crimea";
845
        }else if ("Turkey-in-Europe".equals(area)){return "European Turkey";
846
        }else if ("Azerbaijan".equals(area)){return "Azerbaijan including Nakhichevan";
847
        }else if ("Ireland".equals(area)){return "Republic of Ireland";
848
        }else if ("France".equals(area)){return "French mainland";
849
        }
850
        return area;
851
    }
852

    
853
    private boolean testSingleCommonNames(int n) throws SQLException {
854
        boolean success = true;
855
        ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
856
                + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
857
                + " ORDER BY CAST(tu.id as char(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
858
        ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
859
                + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
860
                + " WHERE t." + origEuroMed
861
                + " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate ");  //sorting also lastActionDate results in a minimum of exact duplicate problems
862
        int count = 0;
863
        while (srcRs.next() && destRs.next()){
864
            success &= testSingleCommonName(srcRs, destRs);
865
            count++;
866
        }
867
        success &= equals("Common name count differs", n, count, "-1");
868
        return success;
869
    }
870

    
871
    boolean prefer639_3 = true;
872
    String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
873
    String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
874

    
875
    private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
876
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
877
        boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
878
        success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
879
        success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
880
        success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
881
        //TODO cn lan_id needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
882
        success &= isNull("Region", destRs, id);  //region does not seem to exist in ERMS
883
        //TODO cn sources, see comments
884
//        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table, check with PESI 2014
885
//        success &= isNull("SourceNameCache", destRs);  //sources should be moved to extra table, check with PESI 2014
886
        success &= isNull("SpeciesExpertGUID", destRs, id);  //SpeciesExpertGUID does not exist in ERMS
887
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
888
        //complete
889
        return success;
890
    }
891

    
892
    private String normalizeLang(String string) {
893
        if ("Spanish".equals(string)){
894
            return "Spanish, Castillian";
895
        }else if ("Modern Greek (1453-)".equals(string)){
896
            return "Greek";
897
        }else if ("Malay (individual language)".equals(string)){
898
            return "Malay";
899
        }else if ("Swahili (individual language)".equals(string)){
900
            return "Swahili";
901
        }
902

    
903
        return string;
904
    }
905

    
906
    private String getLanguageIso(ResultSet destRs) throws SQLException {
907
        String result = destRs.getString(preferredISO639);
908
        if (result == null){
909
            result = destRs.getString(alternativeISO639);
910
        }
911
        return result;
912
    }
913

    
914
    private boolean testSingleReferences(int count) throws SQLException {
915
        boolean success = true;
916
        ResultSet srcRS = source.getResultSet("SELECT r.*, a.titleCache author "
917
                + " FROM Reference r LEFT OUTER JOIN AgentBase a ON r.authorship_id = a.id "
918
                + " ORDER BY r.id ");
919
        ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
920
                + " WHERE s." + origEuroMed
921
                + " ORDER BY s.RefIdInSource ");  // +1 for the source reference "erms" but this has no OriginalDB
922
        int i = 0;
923
        while (srcRS.next() && destRS.next()){
924
            success &= testSingleReference(srcRS, destRS);
925
            i++;
926
        }
927
        success &= equals("References count differs", count, i, "-1");
928
        return success;
929
    }
930

    
931
    private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
932
        String id = String.valueOf(srcRS.getInt("id"));
933
        boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
934
        success &= isNull("IMIS_Id", destRS, id);  //for E+M no IMIS id exists
935
        success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("refType")), destRS.getInt("SourceCategoryFk"), id);
936
        success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("refType")), destRS.getString("SourceCategoryCache"), id);
937
        success &= equals("Reference name ", srcRS.getString("titleCache"), destRS.getString("Name"), id);
938
        success &= equals("Reference abstract ", srcRS.getString("referenceAbstract"), destRS.getString("Abstract"), id);
939
        success &= equals("Reference title ", srcRS.getString("title"), destRS.getString("Title"), id);
940
        success &= equals("Reference author string ", srcRS.getString("author"), destRS.getString("AuthorString"), id);
941
        //TODO reference year
942
        success &= equals("Reference year ", normalizeYear(srcRS), destRS.getString("RefYear"), id);
943
        //FIXME reference nomrefcache
944
//        success &= equals("Reference NomRefCache ", srcRS.getString("abbrevTitleCache"), destRS.getString("NomRefCache"), id);
945
        success &= equals("Reference DOI ", srcRS.getString("doi"), destRS.getString("Doi"), id);
946
        success &= equals("Reference link ", srcRS.getString("uri"), destRS.getString("Link"), id);
947
        //TODO reference Notes
948
//        success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
949
        //complete
950
        return success;
951
    }
952

    
953
    private Integer convertSourceTypeFk(String sourceType) {
954
        if (sourceType == null){
955
            return null;
956
        }else if ("DB".equals(sourceType)){
957
            return PesiTransformer.REF_DATABASE;
958
        }else if ("JOU".equals(sourceType)){
959
            return PesiTransformer.REF_JOURNAL;
960
        }else if ("BK".equals(sourceType)){
961
            return PesiTransformer.REF_BOOK;
962
        }else if ("GEN".equals(sourceType)){
963
            return PesiTransformer.REF_UNRESOLVED;
964
        }else if ("SER".equals(sourceType)){
965
//            TODO correct?
966
            return PesiTransformer.REF_UNRESOLVED;
967
        }
968
        return null;
969
    }
970
    private String convertSourceTypeCache(String sourceType) {
971
        if (sourceType == null){
972
            return null;
973
        }else if ("DB".equals(sourceType)){
974
            return "database";
975
        }else if ("JOU".equals(sourceType)){
976
            return "journal";
977
        }else if ("BK".equals(sourceType)){
978
            return "book";
979
        }else if ("SER".equals(sourceType)){
980
            return "published";
981
        }else if ("BK".equals(sourceType)){
982
            return "book";
983
        }else if ("GEN".equals(sourceType)){
984
            return "unresolved";
985
        }
986
        return null;
987
    }
988

    
989
    private boolean testReferenceCount() {
990
        int countSrc = source.getUniqueInteger(countReferencesStr);
991
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origEuroMed);  // +1 for the source reference "erms" but this has no OriginalDB
992
        boolean success = equals("Reference count ", countSrc, countDest, "-1");
993
        return success;
994
    }
995

    
996
    private String normalizeYear(ResultSet rs) throws SQLException {
997
        String freetext = rs.getString("datePublished_freetext");
998
        if(StringUtils.isNotBlank(freetext)){
999
            return freetext;
1000
        }
1001
        String start = rs.getString("datePublished_start");
1002
        String end = rs.getString("datePublished_end");
1003
        if (start != null){
1004
            start = start.substring(0,4);
1005
        }
1006
        if (end != null){
1007
            end = end.substring(0,4);
1008
        }
1009
        String result = start == null? null: start + (end==null? "": "-"+ end);
1010
        return result;
1011
    }
1012

    
1013
    private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
1014
        if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
1015
            LocalDate date1 = srcDate.toLocalDateTime().toLocalDate();
1016
            LocalDate date2 = destDate.toLocalDateTime().toLocalDate();
1017
            if (date1.equals(date2) || date1.plusDays(1).equals(date2)){
1018
                logger.info(messageStart + " were (almost) equal: " + srcDate);
1019
                return true;
1020
            }else{
1021
                String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+  srcDate + "; Destination: " + destDate;
1022
                logger.warn(message);
1023
                return false;
1024
            }
1025
        }else{
1026
            logger.info(messageStart + " were equal: " + srcDate);
1027
            return true;
1028
        }
1029
    }
1030

    
1031
    private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
1032
        String strId = id.equals("-1")? "": (id+ ": ");
1033
        if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
1034
            String message = strId+ messageStart + " must be equal, but was not.\n Source: "+  nSrc + "; Destination: " + nDest;
1035
            logger.warn(message);
1036
            return false;
1037
        }else{
1038
            logger.info(strId + messageStart + " were equal: " + nSrc);
1039
            return true;
1040
        }
1041
    }
1042

    
1043
//** ************* MAIN ********************************************/
1044

    
1045
    public static void main(String[] args){
1046
        PesiEuroMedValidator validator = new PesiEuroMedValidator();
1047
        validator.invoke(new Source(defaultSource), defaultDestination);
1048
        System.exit(0);
1049
    }
1050
}
(2-2/3)