Project

General

Profile

Download (52.4 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.pesi.euromed;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.sql.Timestamp;
14
import java.time.LocalDate;
15
import java.util.UUID;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19

    
20
import eu.etaxonomy.cdm.app.common.CdmDestinations;
21
import eu.etaxonomy.cdm.app.pesi.PesiDestinations;
22
import eu.etaxonomy.cdm.common.CdmUtils;
23
import eu.etaxonomy.cdm.database.ICdmDataSource;
24
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
25
import eu.etaxonomy.cdm.io.common.Source;
26
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
27
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
28
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
29

    
30
/**
31
 * Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
32
 *
33
 * @author a.mueller
34
 * @since 01.09.2019
35
 */
36
public class PesiEuroMedValidator {
37

    
38
    private static final Logger logger = Logger.getLogger(PesiEuroMedValidator.class);
39

    
40
    private static final ICdmDataSource defaultSource = CdmDestinations.cdm_test_local_mysql_euromed();
41
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI();
42
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI_2();
43

    
44
    boolean doReferences = false;
45
    boolean doTaxa = true;
46
    boolean doTaxRels = false;
47
    boolean doDistributions = false;
48
    boolean doCommonNames = false;
49
    boolean doNotes = false;
50
    boolean doAdditionalTaxonSources = false;
51

    
52
    private Source source = new Source(defaultSource);
53
    private Source destination = defaultDestination;
54

    
55
    private String origEuroMed = "OriginalDB = 'E+M' ";
56

    
57
    public void invoke(Source source, Source destination){
58
        logger.warn("Validate destination " +  destination.getDatabase());
59
        boolean success = true;
60
        try {
61
            this.source = source;
62
            this.destination = destination;
63
            success &= testReferences();
64
            success &= testTaxa();
65
            success &= testTaxonRelations();
66
            success &= testDistributions();
67
            success &= testCommonNames();
68
            success &= testNotes();
69
            success &= testAdditionalTaxonSources();
70
        } catch (Exception e) {
71
            e.printStackTrace();
72
            success = false;
73
        }
74
        //TBC
75
        System.out.println("end validation " + (success? "":"NOT ") + "successful.");
76
    }
77

    
78
    private boolean testAdditionalTaxonSources() throws SQLException {
79
        if (!doAdditionalTaxonSources){
80
            return true;
81
        }
82
        System.out.println("Start validate additional taxon sources");
83
        boolean success = testAdditionalTaxonSourcesCount();
84
        if (success){
85
              success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
86
        }
87
        return success;
88
    }
89

    
90
    private boolean testNotes() throws SQLException {
91
        if (!doNotes){
92
            return true;
93
        }
94
        System.out.println("Start validate notes");
95
        boolean success = testNotesCount();
96
        if (success){
97
              success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
98
        }
99
        return success;
100
    }
101

    
102
    private boolean testDistributions() throws SQLException {
103
        if (!doDistributions){
104
            return true;
105
        }
106
        System.out.println("Start validate distributions");
107
        boolean success = testDistributionCount();
108
        if (!success){
109
              success &= testSingleDistributions(source.getUniqueInteger(distributionCountSQL));
110
        }
111
        return success;
112
    }
113

    
114
    private boolean testCommonNames() throws SQLException {
115
        if (!doCommonNames){
116
            return true;
117
        }
118
        System.out.println("Start validate common names");
119
        boolean success = testCommonNameCount();
120
        if (success){
121
            success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
122
        }
123
        return success;
124
    }
125

    
126
    int countSynonyms;
127
    int countIncludedIns;
128
    private boolean testTaxonRelations() throws SQLException {
129
        if (!doTaxRels){
130
            return true;
131
        }
132
        System.out.println("Start validate taxon relations");
133
        boolean success = testSynonymRelations();
134
        success &= testIncludedInRelations();
135
        success &= testTotalRelations();
136
        success &= testNameRelations();
137
        return success;
138
    }
139

    
140
    private boolean testTotalRelations() {
141
        if (!(countSynonyms < 0 || countIncludedIns < 0)){
142
            int countTotalSrc = countSynonyms + countIncludedIns;
143
            int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
144
            boolean success = equals("Taxrel count + 1 must be same as source taxon count ", countTotalSrc+1, countSrc, String.valueOf(-1));
145
            int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t."+ origEuroMed);
146
            success &= equals("Taxrel count + 1 must be same as destination taxon count ", countTotalSrc+1, countDest, String.valueOf(-1));
147
            return success;
148
        }else{
149
            return false;
150
        }
151
    }
152

    
153
    private final String countSynonymRelation = "SELECT count(*) FROM TaxonBase syn LEFT JOIN TaxonBase acc ON syn.acceptedTaxon_id = acc.id WHERE syn.publish = 1 AND acc.publish = 1 ";
154
    private boolean testSynonymRelations() throws SQLException {
155

    
156
        int countSrc = source.getUniqueInteger(countSynonymRelation);
157
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
158
        boolean success = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
159
        if (success){
160
            //TODO test single synonym relations
161
            success &= testSingleSynonymRelations(source.getUniqueInteger(countSynonymRelation));
162
        }
163
        countSynonyms = (countSrc == countDest)? countSrc : -1;
164
        return success;
165
    }
166

    
167
    private boolean testSingleSynonymRelations(int n) throws SQLException {
168
        boolean success = true;
169
        ResultSet srcRS = source.getResultSet(""
170
                + " SELECT t.id tid, pt.id pid "
171
                + " FROM TaxonNode tn "
172
                + "   INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
173
                + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
174
                + "   LEFT JOIN TaxonBase  pt ON ptn.taxon_id = pt.id "
175
                + " WHERE t.publish = 1 && pt.publish = 1 "
176
                + " ORDER BY CAST(tb.id as char(20)) ");
177

    
178
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
179
                + " FROM RelTaxon rel "
180
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
181
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
182
                + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk > 101 "
183
                + " ORDER BY t1.IdInSource");
184
        int i = 0;
185
        while (srcRS.next() && destRS.next()){
186
            success &= testSingleSynonymRelation(srcRS, destRS);
187
            i++;
188
        }
189
        success &= equals("Synonym relation count for single compare", n, i, String.valueOf(-1));
190
        return success;
191
    }
192

    
193
    private boolean testSingleSynonymRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
194
        String id = String.valueOf(srcRS.getInt("id"));
195
        boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
196
        success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("tu_acctaxon"), destRS.getString("t2Id"), id);
197
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
198
        success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
199
        //TODO enable after next import
200
//        success &= isNull("notes", destRS);
201
        //complete if no further relations need to added
202
        return success;
203
    }
204

    
205
    private boolean testNameRelations() {
206
        //Name relations
207
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM NameRelationship WHERE ("
208
               + " 1=1 "
209
                 + ")");
210
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
211
        boolean success = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
212
        if (success){
213
            //TODO test single name relation
214
//            success &= testSingleNameRelations(source.getUniqueInteger(countSynonymRelation));
215
        }
216
        return success;
217
    }
218

    
219
    private final String countParentRelation  = "SELECT count(*) "
220
            + " FROM TaxonNode tn "
221
            + " INNER JOIN TaxonBase tb ON tn.taxon_id = tb.id "
222
            + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
223
            + "   LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
224
            + " WHERE tb.publish = 1 && pt.publish = 1  ";
225

    
226
    private boolean testIncludedInRelations() throws SQLException {
227
        int countSrc = source.getUniqueInteger(countParentRelation);
228
        int  countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
229
        boolean success = equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
230
        if (success){
231
            success &= testSingleTaxonRelations(source.getUniqueInteger(countParentRelation));
232
        }
233
        countIncludedIns = (countSrc == countDest)? countSrc : -1;
234
        return success;
235
    }
236

    
237
    private boolean testTaxa() throws SQLException {
238
        if (!doTaxa){
239
            return true;
240
        }
241
        System.out.println("Start validate taxa");
242
        boolean success = testTaxaCount();
243
        if (success){
244
            success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
245
        }
246
        return success;
247
    }
248

    
249
    String countReferencesStr = "SELECT count(*) FROM reference ";
250
    private boolean testReferences() throws SQLException {
251
        if (!doReferences){
252
            return true;
253
        }
254
        System.out.println("Start validate references");
255
        boolean success = testReferenceCount();
256
        if (success){
257
            success &= testSingleReferences(source.getUniqueInteger(countReferencesStr));
258
        }
259
        return success;
260
    }
261

    
262
    private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts ";
263
    private boolean testAdditionalTaxonSourcesCount() {
264
        int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
265
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
266
        return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
267
    }
268

    
269
    private boolean testNotesCount() {
270
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
271
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
272
                + " WHERE (1=1) ");
273
        boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
274

    
275
        return result;
276
    }
277

    
278
    private String distributionCountWhere = " WHERE deb.DTYPE = 'Distribution' AND tb.publish = 1 AND a.uuid NOT IN ("
279
            + "'111bdf38-7a32-440a-9808-8af1c9e54b51',"   //E+M
280
            //Former UUSR
281
            + "'c4a898ce-0f32-44fe-a8a3-278e11a4ba53','a575d608-dd53-4c01-b2af-5067d0711f64','da4e9cc3-b1cc-403a-81ff-bcc5d9fadbd1',"
282
            + "'7e0f8fa3-5db9-48f0-9fa8-87fcab3eaa53','2188e3a5-0446-47c8-b11b-b4b2b9a71c75','44f262e3-5091-4d28-8081-440d3978fb0b',"
283
            + "'efabc8fd-0b3c-475b-b532-e1ca0ba0bdbb') ";
284
    private String distributionCountSQL = "SELECT count(*) as n "
285
            + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
286
            + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
287
            + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
288
            + distributionCountWhere;
289
    private boolean testDistributionCount() {
290
        int countSrc = source.getUniqueInteger(distributionCountSQL);
291
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
292
        return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
293
    }
294

    
295
    private boolean testCommonNameCount() {
296
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
297
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
298
        return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
299
    }
300

    
301
    private final String countTaxon = "SELECT count(*) FROM TaxonBase tb WHERE tb.publish = 1 ";
302
    private boolean testTaxaCount() {
303
         int countSrc = source.getUniqueInteger(countTaxon);
304
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t.SourceFk IS NOT NULL OR t.AuthorString = 'auct.' ");
305
         boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
306
         return result;
307
     }
308

    
309
    private boolean testSingleTaxa(int n) throws SQLException {
310
        boolean success = true;
311
        ResultSet srcRS = source.getResultSet("SELECT CAST(tn.id as char(20)) tid, tb.uuid as GUID, pt.id parentId, "
312
                + "      tn.rank_id, rank.titleCache rank_name, "
313
                + "      sec.titleCache secTitle,"
314
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
315
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache nameTitleCache, tn.fullTitleCache nameFullTitleCache, "
316
                + "      tb.DTYPE taxStatus, taxRelType.uuid taxRelTypeUuid, nsType.id nsId, nsType.idInVocabulary nsTitle, "
317
                + "      typeName_id, typeName.titleCache typeFullNameCache, "
318
                + "      CASE WHEN tb.updated IS NOT NULL THEN tb.updated ELSE tb.created END as lastActionDate, "
319
                + "      CASE WHEN tb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
320
                + " FROM TaxonBase tb "
321
                + "     LEFT JOIN TaxonName tn on tb.name_id = tn.id "
322
                + "     LEFT JOIN DefinedTermBase rank ON rank.id = tn.rank_id "
323
                + "     LEFT JOIN Reference sec ON sec.id = tb.sec_id "
324
                + "     LEFT JOIN TaxonName_NomenclaturalStatus nsMN ON tn.id = nsMN.TaxonName_id "
325
                + "     LEFT JOIN NomenclaturalStatus ns ON ns.id = nsMN.status_id "
326
                + "     LEFT JOIN DefinedTermBase nsType ON nsType.id = ns.type_id "
327
                + "     LEFT JOIN TaxonName_TypeDesignationBase typeMN ON typeMN.TaxonName_id = tn.id "
328
                + "     LEFT JOIN TypeDesignationBase td ON td.id = typeMN.typedesignations_id "
329
                + "     LEFT JOIN TaxonName typeName ON typeName.id = td.typeName_id "
330
                + "     LEFT JOIN TaxonNode n ON n.taxon_id = tb.id "
331
                + "     LEFT JOIN TaxonNode ptn ON n.parent_id = ptn.id "
332
                + "     LEFT JOIN TaxonBase pt ON pt.id = ptn.taxon_id AND pt.publish = 1 "
333
                + "     LEFT JOIN TaxonRelationship tr ON tr.relatedFrom_id = tb.id "
334
                + "     LEFT JOIN DefinedTermBase taxRelType ON taxRelType.id = tr.type_id"
335
                + " WHERE tb.publish = 1 "
336
                + " GROUP BY tid, GUID, tn.rank_id, rank.titleCache, secTitle,"
337
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
338
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache, "
339
                + "      tb.DTYPE, tb.updated, tb.created "    //for duplicates caused by >1 name status
340
                + " ORDER BY tid, GUID, lastActionDate ");
341
        ResultSet destRS = destination.getResultSet("SELECT t.*, "
342
                + "     pt.IdInSource parentSourceId, "  //not needed
343
                + "     s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
344
                + " FROM Taxon t "
345
                + "    LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
346
                + "    LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
347
                + "    LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
348
                + "    LEFT JOIN Source s ON s.SourceId = t.SourceFk "
349
                + " WHERE t."+ origEuroMed + " AND (t.SourceFk IS NOT NULL  OR t.AuthorString = 'auct.') "   //FIXME remove SourceFk filter is only preliminary for first check
350
                + " ORDER BY t.IdInSource, t.GUID, t.LastActionDate, AuthorString ");
351
        int i = 0;
352
        logger.error("remove SourceFk filter is only preliminary for first check");
353
        while (srcRS.next() && destRS.next()){
354
            success &= testSingleTaxon(srcRS, destRS);
355
            i++;
356
        }
357
        success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
358
        return success;
359
    }
360

    
361
    private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
362
        String id = String.valueOf(srcRS.getInt("tid"));
363
        //TODO decide, according to SQL it also contains the taxon UUID, but in PESI2014 backup I can't find this
364
        boolean success = equals("Taxon ID", "NameId: " + srcRS.getInt("tid"), destRS.getString("IdInSource"), id);
365
        success &= equals("Taxon source", srcRS.getString("secTitle"), destRS.getString("sourceName"), id);
366

    
367
        success &= equals("Taxon kingdomFk", "3", destRS.getString("KingdomFk"), id);
368
//difficult to test        success &= equals("Taxon rank fk", srcRS.getString("rank_id"), destRS.getString("RankFk"), id);
369
        success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name")), destRS.getString("Rank"), id);
370
        success &= equals("Taxon genusOrUninomial", srcRS.getString("genusOrUninomial"), destRS.getString("GenusOrUninomial"), id) ;
371
        success &= equals("Taxon infraGenericEpithet", srcRS.getString("infraGenericEpithet"), destRS.getString("InfraGenericEpithet"), id) ;
372
        success &= equals("Taxon specificEpithet", srcRS.getString("specificEpithet"), destRS.getString("SpecificEpithet"), id) ;
373
        success &= equals("Taxon infraSpecificEpithet", srcRS.getString("infraSpecificEpithet"), destRS.getString("InfraSpecificEpithet"), id) ;
374

    
375
        success &= equals("Taxon websearchname", srcRS.getString("nameCache"), destRS.getString("WebSearchName"), id);
376
//TODO        success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
377
//FIXME sensu+auct. autoren       success &= equals("Taxon authority", srcRS.getString("authorshipCache"), destRS.getString("AuthorString"), id);
378
//FIXME sensu+auct. autoren        success &= equals("Taxon FullName", srcRS.getString("nameTitleCache"), destRS.getString("FullName"), id);
379
        success &= equals("Taxon NomRefString", makeNomRefString(srcRS), destRS.getString("NomRefString"), id);
380
        success &= equals("Taxon DisplayName", makeDisplayName(srcRS), destRS.getString("DisplayName"), id);  //in ERMS according to SQL script same as FullName, no nom.ref. information attached
381
//difficult to test   success &= equals("Taxon NameStatusFk", nullSafeInt(srcRS, "nsId"),nullSafeInt( destRS,"NameStatusFk"), id);
382
        success &= equals("Taxon NameStatusCache", srcRS.getString("nsTitle"), destRS.getString("NameStatusCache"), id);
383

    
384
//reimport        success &= equals("Taxon TaxonStatusFk", mapTaxStatusFk(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), nullSafeInt( destRS,"TaxonStatusFk"), id);
385
//reimport        success &= equals("Taxon TaxonStatusCache", mapTaxStatus(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), destRS.getString("TaxonStatusCache"), id);
386

    
387
        success &= equals("Taxon ParentTaxonFk", nullSafeInt(srcRS, "parentId"), nullSafeInt(destRS, "ParentTaxonFk"), id);
388

    
389
        Integer origTypeNameFk = nullSafeInt(srcRS, "typeName_id");
390
        success &= equals("Taxon TypeNameFk", origTypeNameFk == null? null : "NameId: " + origTypeNameFk, destRS.getString("typeSourceId"), id);
391
        success &= equals("Taxon TypeFullNameCache", srcRS.getString("typeFullNameCache"), destRS.getString("TypeFullNameCache"), id);
392
        //according to SQL always constant, could be changed in future
393
        success &= equals("Taxon QualityStatusFK", 2, nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
394
        success &= equals("Taxon QualityStatusCache", "Added by Database Management Team", destRS.getString("QualityStatusCache"), id);
395
//TODO TreeIndex
396
        success &= isNull("FossilStatusFk", destRS);
397
        success &= isNull("FossilStatusCache", destRS);
398
        success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
399
        success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
400
        success &= isNull("ExpertGUID", destRS);  //according to SQL + PESI2014
401
//reimport        success &= equals("Taxon ExpertName", srcRS.getString("secTitle"), destRS.getString("ExpertName"), id);
402
        success &= isNull("SpeciesExpertGUID", destRS);
403
//reimport        success &= equals("Taxon SpeciesExpertName", srcRS.getString("secTitle"), destRS.getString("SpeciesExpertName"), id);
404
//FIXME !!        success &= equals("Taxon cache citation", srcRS.getString("secTitle"), destRS.getString("CacheCitation"), id);
405
        success &= equals("Taxon Last Action", srcRS.getString("lastAction"),  destRS.getString("LastAction"), id);
406
        success &= equals("Taxon Last Action Date", srcRS.getTimestamp("lastActionDate"),  destRS.getTimestamp("LastActionDate"), id);
407

    
408
        success &= isNull("GUID2", destRS);  //only relevant after merge
409
        success &= isNull("DerivedFromGuid2", destRS);  //only relevant after merge
410
        return success;
411
    }
412

    
413
    private String makeNomRefString(ResultSet srcRS) throws SQLException {
414
        //there is no pure nomRefString field in CDM and also computing is only possible
415
        //with cache strategy which requires a running CDM instance. So this is a workaround that maybe needs to be adapted
416
        String result = null;
417
        String fullTitle = srcRS.getString("nameFullTitleCache");
418
        String nameTitleCache = srcRS.getString("nameTitleCache");
419
        String nameStatus = CdmUtils.Nz(srcRS.getString("nsTitle"));
420
        if (fullTitle != null && nameTitleCache != null){
421
            result = fullTitle.substring(nameTitleCache.length())
422
                    .replaceAll("^, ", "")
423
                    .replaceAll("(, |^)"+nameStatus+"$", "");
424
        }
425
        return result;
426
    }
427

    
428
    private String mapTaxStatus(String dtype, String taxRelTypeUuidStr) {
429
        Integer statusFk = mapTaxStatusFk(dtype, taxRelTypeUuidStr);
430
        if (statusFk == null){
431
            return null;
432
        }else if (statusFk == PesiTransformer.T_STATUS_ACCEPTED){
433
            return "accepted";
434
        }else if (statusFk == PesiTransformer.T_STATUS_SYNONYM){
435
            return "synonym";
436
        }else if (statusFk == PesiTransformer.T_STATUS_PRO_PARTE_SYN){
437
            return "pro parte synonym";
438
        }else if (statusFk == PesiTransformer.T_STATUS_PARTIAL_SYN){
439
            return "partial synonym";
440
        }
441
        return null;
442
    }
443

    
444
    private Integer mapTaxStatusFk(String dtype, String taxRelTypeUuidStr) {
445
        if (dtype == null){
446
            return null;
447
        }else if ("Synonym".equals(dtype)){
448
            return PesiTransformer.T_STATUS_SYNONYM;
449
        }else if ("Taxon".equals(dtype)){
450
            UUID relTypeUuid = taxRelTypeUuidStr == null? null: UUID.fromString(taxRelTypeUuidStr);
451
            if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){
452
                return PesiTransformer.T_STATUS_PRO_PARTE_SYN;
453
            }else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){
454
                return PesiTransformer.T_STATUS_PARTIAL_SYN;
455
            }else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){
456
                return PesiTransformer.T_STATUS_SYNONYM;  //no explicit MAN status exists in PESI
457
            }else{
458
                return PesiTransformer.T_STATUS_ACCEPTED;
459
            }
460
        }
461
        return null;
462
    }
463

    
464
    private String normalizeRank(String rankStr) {
465
        if (rankStr == null){return null;
466
        }else if (rankStr.equals("Convar")){return "Convariety";
467
        }else if (rankStr.equals("Unranked (infrageneric)")){return "Tax. infragen.";
468
        }else if (rankStr.equals("Unranked (infraspecific)")){return "Tax. infraspec.";
469
        }else if (rankStr.equals("Coll. species")){return "Coll. Species";
470
        }else if (rankStr.equals("Species Aggregate")){return "Aggregate";
471
        }else if (rankStr.equals("Subsection bot.")){return "Subsection";
472
        }return rankStr;
473
    }
474

    
475
    private String makeDisplayName(ResultSet srcRs) throws SQLException {
476
        String nameCache = srcRs.getString("nameCache");
477
        String nameTitle = srcRs.getString("nameTitleCache");
478
        String taggedName = getTaggedNameTitle(nameCache, nameTitle);
479
        String fullNameTitle = srcRs.getString("nameFullTitleCache");
480
        String result = fullNameTitle
481
                .replace(nameTitle, taggedName);
482
        return result;
483
    }
484

    
485
    private String getTaggedNameTitle(String nameCache, String nameTitle) {
486
        if (nameCache == null){
487
            logger.warn("NameCache is null");
488
            return nameTitle;
489
        }
490
        String result = null;
491
        try {
492
            String[] nameCacheSplit = nameCache.split(" ");
493
            String[] nameTitleSplit = nameTitle.split(" ");
494
            result = "";
495
            boolean currentIsName = false;
496
            for (int i=0, j=0; j < nameTitleSplit.length; j++){
497
                if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
498
                        && !isMarker(nameCacheSplit[i])){
499
                    if(!currentIsName){
500
                        result += " <i>" + nameCacheSplit[i];
501
                        currentIsName = true;
502
                    }else{
503
                        result += " " + nameCacheSplit[i];
504
                    }
505
                    if((j+1)==nameTitleSplit.length){
506
                        result += "</i>";
507
                    }
508
                    i++;
509
                }else{
510
                    if(currentIsName){
511
                        result += "</i>";
512
                        currentIsName = false;
513
                    }
514
                    result += " " + nameTitleSplit[j];
515
                    if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
516
                            && isMarker(nameCacheSplit[i])){
517
                        i++;
518
                    }
519
                }
520
            }
521
            return result.trim();
522
        } catch (Exception e) {
523
            e.printStackTrace();
524
            return result;
525
        }
526
    }
527

    
528
    private boolean isMarker(String nameCacheSplit) {
529
        return nameCacheSplit.endsWith(".") || nameCacheSplit.equals("[unranked]") ;
530
    }
531

    
532
    private boolean testSingleTaxonRelations(int n) throws SQLException {
533
        boolean success = true;
534
        ResultSet srcRS = source.getResultSet(""
535
                + " SELECT t.name_id tid, pt.name_id pid "
536
                + " FROM TaxonNode tn "
537
                + "   INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
538
                + "   LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
539
                + "   LEFT JOIN TaxonBase  pt ON ptn.taxon_id = pt.id "
540
                + " WHERE t.publish = 1 && pt.publish = 1 "
541
                + " ORDER BY CAST(t.name_id as char(20)) ");
542

    
543
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
544
                + " FROM RelTaxon rel "
545
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
546
                + "    LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
547
                + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk = 101 "
548
                + " ORDER BY t1.IdInSource");
549
        int i = 0;
550
        while (srcRS.next() && destRS.next()){
551
            success &= testSingleTaxonRelation(srcRS, destRS);
552
            i++;
553
        }
554
        success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
555
        return success;
556
    }
557

    
558
    private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
559
        String id = String.valueOf(srcRS.getInt("tid"));
560
        boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("tid"), destRS.getString("t1Id"), id);
561
        success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("pid"), destRS.getString("t2Id"), id);
562
        success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_TAXONOMICALLY_INCLUDED_IN, destRS.getInt("RelTaxonQualifierFk"), id);
563
        success &= equals("Taxon relation qualifier cache", "is taxonomically included in", destRS.getString("RelQualifierCache"), id);
564
        //TODO enable after next import
565
        success &= isNull("notes", destRS);
566
        //complete if no further relations need to added
567
        return success;
568
    }
569

    
570
    private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
571
        boolean success = true;
572
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, MN.*, s.*, su.sourceuse_name "
573
                + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
574
                + "    LEFT JOIN sources s ON s.id = MN.source_id "
575
                + "    LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
576
                + " ORDER BY CAST(tu.id as char(20)), MN.sourceuse_id, s.id ");  //, no.note (not possible because ntext
577
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
578
                + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
579
                + "    INNER JOIN Source s ON s.SourceId = ats.SourceFk "
580
                + "    LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
581
                + " WHERE t."+origEuroMed
582
                + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
583
        int count = 0;
584
        while (srcRs.next() && destRs.next()){
585
            success &= testSingleAdditionalTaxonSource(srcRs, destRs);
586
            count++;
587
        }
588
        success &= equals("Notes count differs", n, count, "-1");
589
        return success;
590
    }
591

    
592
    private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
593
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
594
        boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
595
        success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id);  //currently we use the same id in ERMS and PESI
596
        success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
597
        success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
598
        //TODO some records are still truncated ~ >820 characters
599
        success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
600
        success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
601
        //Complete
602
        return success;
603
    }
604

    
605
    private boolean testSingleNotes(int n) throws SQLException {
606
        boolean success = true;
607
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, no.*, l.LanName "
608
                + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
609
                + "    LEFT JOIN languages l ON l.LanID = no.lan_id "
610
                + " ORDER BY CAST(tu.id as char(20)), no.type, no.noteSortable ");  //, no.note (not possible because ntext
611
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
612
                + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
613
                + "    LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
614
                + "    LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
615
                + " WHERE t." + origEuroMed
616
                + "      AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
617
                + " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1  ");
618
        int count = 0;
619
        while (srcRs.next() && destRs.next()){
620
            success &= testSingleNote(srcRs, destRs);
621
            count++;
622
        }
623
        success &= equals("Notes count differs", n, count, "-1");
624
        return success;
625
    }
626

    
627
    private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
628
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
629
        boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
630
        success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
631
        success &= isNull("Note_2", destRs);
632
        success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
633
        success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
634
        success &= isNull("Region", destRs);
635
        success &= isNull("SpeciesExpertGUID", destRs);
636
        //SpeciesExpertName, LastAction, LastActionDate handled in separate method
637
        //complete
638
        return success;
639
    }
640

    
641
    private String normalizeNoteCatCache(String string) {
642
        return StringUtils.capitalize(string)
643
                .replace("Original Combination", "Original combination")
644
                .replace("Taxonomic remark", "Taxonomic Remark");
645
    }
646

    
647
    private boolean testSingleDistributions(int n) throws SQLException {
648
        boolean success = true;
649
        ResultSet srcRs = source.getResultSet(
650
                  " SELECT CAST(tb.name_id as char(20)) AS tid, a.idInVocabulary, a.titleCache area, st.uuid statusUuid, "
651
                + "        CASE WHEN deb.updated IS NOT NULL THEN deb.updated ELSE deb.created END as lastActionDate, "
652
                + "        CASE WHEN deb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
653
                + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
654
                + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
655
                + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
656
                + "    LEFT JOIN DefinedTermBase st ON st.id = deb.status_id "
657
                + distributionCountWhere
658
                + " ORDER BY CAST(tb.name_id as char(20)), a.idInVocabulary, a.titleCache ");
659
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaEmCode, oc.*, a.AreaName "
660
                + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
661
                + "    LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
662
                + " WHERE t." + origEuroMed
663
                + " ORDER BY t.IdInSource, a.AreaEmCode, a.AreaName, oc.Notes ");
664
        int count = 0;
665
        while (srcRs.next() && destRs.next()){
666
            success &= testSingleDistribution(srcRs, destRs);
667
            count++;
668
        }
669
        success &= equals("Distribution count differs", n, count, "-1");
670
        return success;
671
    }
672

    
673
    private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
674
        String id = String.valueOf(srcRs.getInt("tid") + "-" + srcRs.getString("area"));
675
        boolean success = equals("Distribution taxonID ", "NameId: " + String.valueOf(srcRs.getInt("tid")), destRs.getString("IdInSource"), id);
676
        success &= equals("Distribution AreaEmCode ", srcRs.getString("idInVocabulary"), destRs.getString("AreaEmCode"), id);
677
//        success &= equals("Distribution area name ", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaName"), id);
678
        success &= equals("Distribution area name cache", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaNameCache"), id);
679
        success &= equals("Distribution OccurrenceStatusFk", mapStatus(srcRs.getString("statusUuid")), destRs.getInt("OccurrenceStatusFk"), id);
680
//TODO        success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
681
        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table only, according to script there were values, but in PESI 2014 values existed only in OccurrenceSource table (for all only E+M records)
682
        success &= isNull("SourceCache", destRs);  //sources should be moved to extra table, see above
683
//TODO        success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
684
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in EM and according to script
685
        success &= isNull("SpeciesExpertName", destRs);  //SpeciesExpertName does not exist in EM and according to script
686
        success &= equals("Distribution Last Action", srcRs.getString("lastAction"),  destRs.getString("LastAction"), id);
687
        success &= equals("Distribution Last Action Date", srcRs.getTimestamp("lastActionDate"),  destRs.getTimestamp("LastActionDate"), id);
688
        return success;
689
    }
690

    
691
    /**
692
     * @param string
693
     * @return
694
     */
695
    private Integer mapStatus(String uuidStr) {
696
        UUID uuid = UUID.fromString(uuidStr);
697
        if (uuid.equals(PresenceAbsenceTerm.uuidNativeError) ){  //native, reported in error
698
            return PesiTransformer.STATUS_ABSENT;
699
        }else if (uuid.equals(PresenceAbsenceTerm.uuidIntroducesAdventitious)  //casual, introduced adventitious
700
                || uuid.equals(PresenceAbsenceTerm.uuidIntroducedUncertainDegreeNaturalisation)//introduced: uncertain degree of naturalisation
701
                || uuid.equals(PresenceAbsenceTerm.uuidIntroduced)){
702
            return PesiTransformer.STATUS_INTRODUCED;
703
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNative) ){  //native
704
            return PesiTransformer.STATUS_NATIVE;
705
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNaturalised) ){  //naturalised
706
            return PesiTransformer.STATUS_NATURALISED;
707
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNativePresenceQuestionable) ){  //native, presence questionable
708
            return PesiTransformer.STATUS_DOUBTFUL;
709
        }else if (uuid.equals(PresenceAbsenceTerm.uuidCultivated) ){  //cultivated
710
            return PesiTransformer.STATUS_MANAGED;
711
        }else if (uuid.equals(BerlinModelTransformer.uuidStatusUndefined) ){  //native, reported in error
712
            return -1;
713
        }
714

    
715
        return null;
716
    }
717

    
718
    private String normalizeDistrArea(String area) {
719
        if (area == null){
720
            return null;
721
        }else if ("France".equals(area)){return "French mainland";
722
        }else if ("France, with Channel Islands and Monaco".equals(area)){return "France";
723
        }else if ("Greece".equals(area)){return "Greece with Cyclades and more islands";
724
        }else if ("Spain, with Gibraltar and Andorra (without Bl and Ca)".equals(area)){return "Spain";
725
        }else if ("Italy, with San Marino and Vatican City (without Sa and Si(S))".equals(area)){return "Italy";
726
        }else if ("Morocco, with Spanish territories".equals(area)){return "Morocco";
727
        }else if ("Serbia including Kosovo and Vojvodina".equals(area)){return "Serbia including Vojvodina and with Kosovo";
728
        }else if ("Caucasia (Ab + Ar + Gg + Rf(CS))".equals(area)){return "Caucasus region";
729
        }else if ("Georgia, with Abchasia and Adzharia".equals(area)){return "Georgia";
730
        }else if ("Canary Is.".equals(area)){return "Canary Islands";
731
        }else if ("Kriti with Karpathos, Kasos & Gavdhos".equals(area)){return "Crete with Karpathos, Kasos & Gavdhos";
732
        }else if ("Ireland, with N Ireland".equals(area)){return "Ireland";
733
        }else if ("mainland Spain".equals(area)){return "Kingdom of Spain";
734
        }else if ("Portugal".equals(area)){return "Portuguese mainland";
735
        }else if ("Svalbard".equals(area)){return "Svalbard with Björnöya and Jan Mayen";
736
        }else if ("Norway".equals(area)){return "Norwegian mainland";
737
        }else if ("Ukraine".equals(area)){return "Ukraine including Crimea";
738
        }else if ("Turkey-in-Europe".equals(area)){return "European Turkey";
739
        }else if ("Azerbaijan".equals(area)){return "Azerbaijan including Nakhichevan";
740
        }else if ("Ireland".equals(area)){return "Republic of Ireland";
741
        }else if ("France".equals(area)){return "French mainland";
742
        }
743
        return area;
744
    }
745

    
746
    private boolean testSingleCommonNames(int n) throws SQLException {
747
        boolean success = true;
748
        ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
749
                + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
750
                + " ORDER BY CAST(tu.id as char(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
751
        ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
752
                + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
753
                + " WHERE t." + origEuroMed
754
                + " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate ");  //sorting also lastActionDate results in a minimum of exact duplicate problems
755
        int count = 0;
756
        while (srcRs.next() && destRs.next()){
757
            success &= testSingleCommonName(srcRs, destRs);
758
            count++;
759
        }
760
        success &= equals("Common name count differs", n, count, "-1");
761
        return success;
762
    }
763

    
764
    boolean prefer639_3 = true;
765
    String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
766
    String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
767

    
768
    private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
769
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
770
        boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
771
        success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
772
        success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
773
        success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
774
        //TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
775
        success &= isNull("Region", destRs);  //region does not seem to exist in ERMS
776
        //TODO see comments
777
//        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table, check with PESI 2014
778
//        success &= isNull("SourceNameCache", destRs);  //sources should be moved to extra table, check with PESI 2014
779
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in ERMS
780
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
781
        //complete
782
        return success;
783
    }
784

    
785
    private String normalizeLang(String string) {
786
        if ("Spanish".equals(string)){
787
            return "Spanish, Castillian";
788
        }else if ("Modern Greek (1453-)".equals(string)){
789
            return "Greek";
790
        }else if ("Malay (individual language)".equals(string)){
791
            return "Malay";
792
        }else if ("Swahili (individual language)".equals(string)){
793
            return "Swahili";
794
        }
795

    
796
        return string;
797
    }
798

    
799
    private String getLanguageIso(ResultSet destRs) throws SQLException {
800
        String result = destRs.getString(preferredISO639);
801
        if (result == null){
802
            result = destRs.getString(alternativeISO639);
803
        }
804
        return result;
805
    }
806

    
807
    private boolean testSingleReferences(int count) throws SQLException {
808
        boolean success = true;
809
        ResultSet srcRS = source.getResultSet("SELECT r.*, a.titleCache author "
810
                + " FROM Reference r LEFT OUTER JOIN AgentBase a ON r.authorship_id = a.id "
811
                + " ORDER BY r.id ");
812
        ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
813
                + " WHERE s." + origEuroMed
814
                + " ORDER BY s.RefIdInSource ");  // +1 for the source reference "erms" but this has no OriginalDB
815
        int i = 0;
816
        while (srcRS.next() && destRS.next()){
817
            success &= testSingleReference(srcRS, destRS);
818
            i++;
819
        }
820
        success &= equals("References count differs", count, i, "-1");
821
        return success;
822
    }
823

    
824
    private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
825
        String id = String.valueOf(srcRS.getInt("id"));
826
        boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
827
        success &= isNull("IMIS_Id", destRS);  //for E+M no IMIS id exists
828
        success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("refType")), destRS.getInt("SourceCategoryFk"), id);
829
        success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("refType")), destRS.getString("SourceCategoryCache"), id);
830
        success &= equals("Reference name ", srcRS.getString("titleCache"), destRS.getString("Name"), id);
831
        success &= equals("Reference abstract ", srcRS.getString("referenceAbstract"), destRS.getString("Abstract"), id);
832
        success &= equals("Reference title ", srcRS.getString("title"), destRS.getString("Title"), id);
833
        success &= equals("Reference author string ", srcRS.getString("author"), destRS.getString("AuthorString"), id);
834
        //TODO
835
        success &= equals("Reference year ", normalizeYear(srcRS), destRS.getString("RefYear"), id);
836
        //FIXME
837
//        success &= equals("Reference NomRefCache ", srcRS.getString("abbrevTitleCache"), destRS.getString("NomRefCache"), id);
838
        success &= equals("Reference DOI ", srcRS.getString("doi"), destRS.getString("Doi"), id);
839
        success &= equals("Reference link ", srcRS.getString("uri"), destRS.getString("Link"), id);
840
        //TODO Notes
841
//        success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
842
        //complete
843
        return success;
844
    }
845

    
846
    private Integer convertSourceTypeFk(String sourceType) {
847
        if (sourceType == null){
848
            return null;
849
        }else if ("DB".equals(sourceType)){
850
            return PesiTransformer.REF_DATABASE;
851
        }else if ("JOU".equals(sourceType)){
852
            return PesiTransformer.REF_JOURNAL;
853
        }else if ("BK".equals(sourceType)){
854
            return PesiTransformer.REF_BOOK;
855
        }else if ("GEN".equals(sourceType)){
856
            return PesiTransformer.REF_UNRESOLVED;
857
        }else if ("SER".equals(sourceType)){
858
//            TODO correct?
859
            return PesiTransformer.REF_UNRESOLVED;
860
        }else if ("i".equals(sourceType)){
861
            return 12;
862
        }
863
        return null;
864
    }
865
    private String convertSourceTypeCache(String sourceType) {
866
        if (sourceType == null){
867
            return null;
868
        }else if ("DB".equals(sourceType)){
869
            return "database";
870
        }else if ("JOU".equals(sourceType)){
871
            return "journal";
872
        }else if ("BK".equals(sourceType)){
873
            return "book";
874
        }else if ("SER".equals(sourceType)){
875
            return "published";
876
        }else if ("BK".equals(sourceType)){
877
            return "book";
878
        }else if ("GEN".equals(sourceType)){
879
            return "unresolved";
880
        }else if ("i".equals(sourceType)){
881
            //TODO
882
            return "i";
883
        }
884
        return null;
885
    }
886

    
887
    private boolean testReferenceCount() {
888
        int countSrc = source.getUniqueInteger(countReferencesStr);
889
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origEuroMed);  // +1 for the source reference "erms" but this has no OriginalDB
890
        boolean success = equals("Reference count ", countSrc, countDest, "-1");
891
        return success;
892
    }
893

    
894
    private String normalizeYear(ResultSet rs) throws SQLException {
895
        String freetext = rs.getString("datePublished_freetext");
896
        if(StringUtils.isNotBlank(freetext)){
897
            return freetext;
898
        }
899
        String start = rs.getString("datePublished_start");
900
        String end = rs.getString("datePublished_end");
901
        if (start != null){
902
            start = start.substring(0,4);
903
        }
904
        if (end != null){
905
            end = end.substring(0,4);
906
        }
907
        String result = start == null? null: start + (end==null? "": "-"+ end);
908
        return result;
909
    }
910

    
911
    private boolean isNull(String attrName, ResultSet destRS) throws SQLException {
912
        Object value = destRS.getObject(attrName);
913
        if (value != null){
914
            String message = attrName + " was expected to be null but was: " + value.toString();
915
            logger.warn(message);
916
            return false;
917
        }else{
918
            logger.info(attrName + " was null as expected");
919
            return true;
920
        }
921
    }
922

    
923
    private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
924
        if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
925
            LocalDate date1 = srcDate.toLocalDateTime().toLocalDate();
926
            LocalDate date2 = destDate.toLocalDateTime().toLocalDate();
927
            if (date1.equals(date2) || date1.plusDays(1).equals(date2)){
928
                logger.info(messageStart + " were (almost) equal: " + srcDate);
929
                return true;
930
            }else{
931
                String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+  srcDate + "; Destination: " + destDate;
932
                logger.warn(message);
933
                return false;
934
            }
935
        }else{
936
            logger.info(messageStart + " were equal: " + srcDate);
937
            return true;
938
        }
939
    }
940

    
941
    private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
942
        String strId = id.equals("-1")? "": (id+ ": ");
943
        if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
944
            String message = strId+ messageStart + " must be equal, but was not.\n Source: "+  nSrc + "; Destination: " + nDest;
945
            logger.warn(message);
946
            return false;
947
        }else{
948
            logger.info(strId + messageStart + " were equal: " + nSrc);
949
            return true;
950
        }
951
    }
952

    
953
    private boolean equals(String messageStart, String strSrc, String strDest, String id) {
954
        if (StringUtils.isBlank(strSrc)){
955
            strSrc = null;
956
        }else{
957
            strSrc = strSrc.trim();
958
        }
959
        //we do not trim strDest here because this should be done during import already. If not it should be shown here
960
        if (!CdmUtils.nullSafeEqual(strSrc, strDest)){
961
            int index = CdmUtils.diffIndex(strSrc, strDest);
962
            String message = id+ ": " + messageStart + " must be equal, but was not at "+index+".\n  Source:      "+  strSrc + "\n  Destination: " + strDest;
963
            logger.warn(message);
964
            return false;
965
        }else{
966
            logger.info(id+ ": " + messageStart + " were equal: " + strSrc);
967
            return true;
968
        }
969
    }
970

    
971
    protected Integer nullSafeInt(ResultSet rs, String columnName) throws SQLException {
972
        Object intObject = rs.getObject(columnName);
973
        if (intObject == null){
974
            return null;
975
        }else{
976
            return Integer.valueOf(intObject.toString());
977
        }
978
    }
979

    
980
//** ************* MAIN ********************************************/
981

    
982

    
983

    
984
    public static void main(String[] args){
985
        PesiEuroMedValidator validator = new PesiEuroMedValidator();
986
        validator.invoke(new Source(defaultSource), defaultDestination);
987
        System.exit(0);
988
    }
989
}
(4-4/4)