Project

General

Profile

« Previous | Next » 

Revision b639d1f2

Added by Andreas Müller over 4 years ago

ref #8577 updates to the E+M PESI export validation

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/euromed/PesiEuroMedValidator.java
11 11
import java.sql.ResultSet;
12 12
import java.sql.SQLException;
13 13
import java.sql.Timestamp;
14
import java.time.LocalDate;
15
import java.util.UUID;
14 16

  
15 17
import org.apache.commons.lang.StringUtils;
16 18
import org.apache.log4j.Logger;
......
19 21
import eu.etaxonomy.cdm.app.pesi.PesiDestinations;
20 22
import eu.etaxonomy.cdm.common.CdmUtils;
21 23
import eu.etaxonomy.cdm.database.ICdmDataSource;
24
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
22 25
import eu.etaxonomy.cdm.io.common.Source;
23 26
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
27
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
24 28

  
25 29
/**
26 30
 * Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
......
33 37
    private static final Logger logger = Logger.getLogger(PesiEuroMedValidator.class);
34 38

  
35 39
    private static final ICdmDataSource defaultSource = CdmDestinations.cdm_test_local_mysql_euromed();
36
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI();
37
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI_2();
40
//    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI();
41
    private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI_2();
38 42

  
39 43
    private Source source = new Source(defaultSource);
40 44
    private Source destination = defaultDestination;
......
47 51
        try {
48 52
            this.source = source;
49 53
            this.destination = destination;
50
            success &= testReferences();
51
//            success &= testTaxa();
54
//            success &= testReferences();
55
              success &= testTaxa();
52 56
//            success &= testTaxonRelations();
53
//            success &= testDistributions();
57
//              success &= testDistributions();
54 58
//            success &= testNotes();
55 59
//            success &= testAdditionalTaxonSources();
56 60
        } catch (Exception e) {
......
82 86
    private boolean testDistributions() throws SQLException {
83 87
        System.out.println("Start validate distributions");
84 88
        boolean success = testDistributionCount();
85
        if (success){
86
              success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr "));
89
        if (!success){
90
              success &= testSingleDistributions(source.getUniqueInteger(distributionCountSQL));
87 91
        }
88 92
        return success;
89 93
    }
......
179 183
        System.out.println("Start validate taxa");
180 184
        boolean success = testTaxaCount();
181 185
        //FIXME
182
        if (!success){
186
        if (success){
183 187
            success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
184 188
        }
185 189
        return success;
......
221 225
        return result;
222 226
    }
223 227

  
228
    private String distributionCountWhere = " WHERE deb.DTYPE = 'Distribution' AND tb.publish = 1 AND a.uuid NOT IN ("
229
            + "'111bdf38-7a32-440a-9808-8af1c9e54b51',"   //E+M
230
            //Former UUSR
231
            + "'c4a898ce-0f32-44fe-a8a3-278e11a4ba53','a575d608-dd53-4c01-b2af-5067d0711f64','da4e9cc3-b1cc-403a-81ff-bcc5d9fadbd1',"
232
            + "'7e0f8fa3-5db9-48f0-9fa8-87fcab3eaa53','2188e3a5-0446-47c8-b11b-b4b2b9a71c75','44f262e3-5091-4d28-8081-440d3978fb0b',"
233
            + "'efabc8fd-0b3c-475b-b532-e1ca0ba0bdbb') ";
234
    private String distributionCountSQL = "SELECT count(*) as n "
235
            + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
236
            + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
237
            + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
238
            + distributionCountWhere;
224 239
    private boolean testDistributionCount() {
225
        int countSrc = source.getUniqueInteger("SELECT count(*) FROM dr ");
240
        int countSrc = source.getUniqueInteger(distributionCountSQL);
226 241
        int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
227 242
        return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
228 243
    }
......
236 251
    private final String countSynonymRelation = "SELECT count(*) FROM tu syn LEFT JOIN tu acc ON syn.tu_acctaxon = acc.id WHERE (syn.id <> acc.id AND syn.tu_acctaxon IS NOT NULL AND syn.id <> acc.tu_parent) ";
237 252
    private final String countParentRelation  = "SELECT count(*)-1 FROM tu syn LEFT JOIN tu acc ON syn.tu_acctaxon = acc.id WHERE (syn.id =  acc.id OR  syn.tu_acctaxon IS     NULL OR  syn.id =  acc.tu_parent) ";
238 253

  
239
    private final String countTaxon = "SELECT count(*) FROM tu ";
254
    private final String countTaxon = "SELECT count(*) FROM TaxonBase tb WHERE tb.publish = 1 ";
240 255
    private boolean testTaxaCount() {
241 256
         int countSrc = source.getUniqueInteger(countTaxon);
242
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon ");
257
         int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t.SourceFk IS NOT NULL OR t.AuthorString = 'auct.' ");
243 258
         boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
244 259

  
245
         //NomStatus
246
         countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE ("
247
               + " tu_unacceptreason like '%inval%' OR  tu_unacceptreason like '%not val%' "
248
               + " OR tu_unacceptreason like '%illeg%' OR tu_unacceptreason like '%nud%' "
249
               + " OR tu_unacceptreason like '%rej.%' OR tu_unacceptreason like '%superfl%' "
250
               + " OR tu_unacceptreason like '%Comb. nov%' OR tu_unacceptreason like '%New name%' "
251
               + " OR tu_unacceptreason = 'new combination'  "
252
               + " OR tu_status IN (3,5,6,7,8) )");
253
         countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon WHERE NameStatusFk IS NOT NULL ");
254
         result = equals("Taxon name status count ", countSrc, countDest, String.valueOf(-1));
260
//         //NomStatus
261
//         countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE ("
262
//               + " tu_unacceptreason like '%inval%' OR  tu_unacceptreason like '%not val%' "
263
//               + " OR tu_unacceptreason like '%illeg%' OR tu_unacceptreason like '%nud%' "
264
//               + " OR tu_unacceptreason like '%rej.%' OR tu_unacceptreason like '%superfl%' "
265
//               + " OR tu_unacceptreason like '%Comb. nov%' OR tu_unacceptreason like '%New name%' "
266
//               + " OR tu_unacceptreason = 'new combination'  "
267
//               + " OR tu_status IN (3,5,6,7,8) )");
268
//         countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon WHERE NameStatusFk IS NOT NULL ");
269
//         result = equals("Taxon name status count ", countSrc, countDest, String.valueOf(-1));
255 270

  
256 271
         return result;
257 272
     }
258 273

  
259 274
    private boolean testSingleTaxa(int n) throws SQLException {
260 275
        boolean success = true;
261
        ResultSet srcRS = source.getResultSet(""
262
                + " SELECT t.*, tu1.tu_name tu1_name, r.rank_name, acc.tu_sp as acc_sp, st.status_name, "
263
                + "        type.tu_displayname typename, type.tu_authority typeauthor, "
264
                + "        fo.fossil_name, qs.qualitystatus_name "
265
                + " FROM tu t "
266
                + " LEFT JOIN tu as tu1 on t.tu_parent = tu1.id"
267
                + " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 40 AND rank_name = 'Subdivision' OR rank_id = 122 AND rank_name='Subsection')) as r ON t.tu_rank = r.rank_id "
268
                + " LEFT JOIN tu acc ON acc.id = t.tu_acctaxon "
269
                + " LEFT JOIN status st ON st.status_id = t.tu_status "
270
                + " LEFT JOIN tu type ON type.id = t.tu_typetaxon "
271
                + " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
272
                + " LEFT JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
273
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
276
        ResultSet srcRS = source.getResultSet("SELECT CAST(tn.id as char(20)) tid, tb.uuid as GUID, tn.rank_id, rank.titleCache rank_name, "
277
                + "      sec.titleCache secTitle,"
278
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
279
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache nameTitleCache, "
280
                + "      tb.DTYPE taxStatus, nsType.id nsId, nsType.idInVocabulary nsTitle, "
281
                + "      CASE WHEN tb.updated IS NOT NULL THEN tb.updated ELSE tb.created END as lastActionDate, "
282
                + "      CASE WHEN tb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
283
                + " FROM TaxonBase tb "
284
                + "     LEFT JOIN TaxonName tn on tb.name_id = tn.id "
285
                + "     LEFT JOIN DefinedTermBase rank ON rank.id = tn.rank_id "
286
                + "     LEFT JOIN Reference sec ON sec.id = tb.sec_id "
287
                + "     LEFT JOIN TaxonName_NomenclaturalStatus nsMN ON tn.id = nsMN.TaxonName_id "
288
                + "     LEFT JOIN NomenclaturalStatus ns ON ns.id = nsMN.status_id "
289
                + "     LEFT JOIN DefinedTermBase nsType ON nsType.id = ns.type_id "
290
                + " WHERE tb.publish = 1 "
291
                + " GROUP BY tid, GUID, tn.rank_id, rank.titleCache, secTitle,"
292
                + "      tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
293
                + "      tn.nameCache, tn.authorshipCache, tn.titleCache, "
294
                + "      tb.DTYPE, tb.updated, tb.created "    //for duplicates caused by >1 name status
295
                + " ORDER BY tid, GUID, lastActionDate ");
274 296
        ResultSet destRS = destination.getResultSet("SELECT t.*, "
275 297
                + "     pt.GenusOrUninomial p_GenusOrUninomial, pt.InfraGenericEpithet p_InfraGenericEpithet, pt.SpecificEpithet p_SpecificEpithet, "
276 298
                + "     s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
......
279 301
                + "    LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
280 302
                + "    LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
281 303
                + "    LEFT JOIN Source s ON s.SourceId = t.SourceFk "
282
                + " WHERE t."+ origEuroMed
283
                + " ORDER BY t.IdInSource");
284
        ResultSet srcRsLastAction = source.getResultSet(""
285
                + " SELECT t.id, s.sessiondate, a.action_name, s.ExpertName "
286
                + " FROM tu t "
287
                + "   LEFT OUTER JOIN tu_sessions MN ON t.id = MN.tu_id "
288
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
289
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
290
                + " ORDER BY CAST(t.id as nvarchar(20)), s.sessiondate DESC, a.id DESC ");
304
                + " WHERE t."+ origEuroMed + " AND (t.SourceFk IS NOT NULL  OR t.AuthorString = 'auct.') "   //FIXME remove SourceFk filter is only preliminary for first check
305
                + " ORDER BY t.IdInSource, t.GUID, t.LastActionDate, AuthorString ");
291 306
        int i = 0;
307
        logger.error("remove SourceFk filter is only preliminary for first check");
292 308
        while (srcRS.next() && destRS.next()){
293 309
            success &= testSingleTaxon(srcRS, destRS);
294
//TODO       success &= testLastAction(srcRsLastAction, destRS, String.valueOf(srcRS.getInt("id")), "Taxon");
295 310
            i++;
296 311
        }
297 312
        success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
......
300 315

  
301 316

  
302 317
    private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
303
        String id = String.valueOf(srcRS.getInt("id"));
304
        boolean success = equals("Taxon ID", "tu_id: " + srcRS.getInt("id"), destRS.getString("IdInSource"), id);
305
        success &= equals("Taxon source", "ERMS export for PESI", destRS.getString("sourceName"), id);
306
//TODO some
307
        success &= compareKingdom("Taxon kingdom", srcRS, destRS, id);
308
        success &= equals("Taxon rank fk", srcRS.getString("tu_rank"), destRS.getString("RankFk"), id);
318
        String id = String.valueOf(srcRS.getInt("tid"));
319
        //TODO decide, according to SQL it also contains the taxon UUID, but in PESI2014 backup I can't find this
320
        boolean success = equals("Taxon ID", "NameId: " + srcRS.getInt("tid"), destRS.getString("IdInSource"), id);
321
        success &= equals("Taxon source", srcRS.getString("secTitle"), destRS.getString("sourceName"), id);
322

  
323
        success &= equals("Taxon kingdomFk", "3", destRS.getString("KingdomFk"), id);
324
//difficult to test        success &= equals("Taxon rank fk", srcRS.getString("rank_id"), destRS.getString("RankFk"), id);
309 325
        success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name"), srcRS, id), destRS.getString("Rank"), id);
310
        success &= compareNameParts(srcRS, destRS, id);
311

  
312
        success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id);
313
//        success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
314
        success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id);
315
//        success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
316
        success &= isNull("NomRefString", destRS);
317
//        success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id);  //according to SQL script same as FullName, no nom.ref. information attached
318

  
319
//TODO        success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
320
//TODO        success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
321

  
322
//TODO        success &= equals("Taxon TaxonStatusFk", nullSafeInt(srcRS, "tu_status"),nullSafeInt( destRS,"TaxonStatusFk"), id);
323
//TODO        success &= equals("Taxon TaxonStatusCache", srcRS.getString("status_name"), destRS.getString("TaxonStatusCache"), id);
324

  
325
        //TODO ParentTaxonFk
326
        Integer orgigTypeNameFk = nullSafeInt(srcRS, "tu_typetaxon");
327
        success &= equals("Taxon TypeNameFk", orgigTypeNameFk == null? null : "tu_id: " + orgigTypeNameFk, destRS.getString("typeSourceId"), id);
328
//TODO  success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id);
329
        success &= equals("Taxon QualityStatusFK", nullSafeInt(srcRS, "tu_qualitystatus"),nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
330
        success &= equals("Taxon QualityStatusCache", srcRS.getString("qualitystatus_name"), destRS.getString("QualityStatusCache"), id);
331
        //TODO TreeIndex
332
        success &= equals("Taxon FossilStatusFk", nullSafeInt(srcRS, "tu_fossil"),nullSafeInt( destRS,"FossilStatusFk"), String.valueOf(id));
333
        success &= equals("Taxon FossilStatusCache", srcRS.getString("fossil_name"), destRS.getString("FossilStatusCache"), id);
326
        success &= equals("Taxon genusOrUninomial", srcRS.getString("genusOrUninomial"), destRS.getString("GenusOrUninomial"), id) ;
327
        success &= equals("Taxon infraGenericEpithet", srcRS.getString("infraGenericEpithet"), destRS.getString("InfraGenericEpithet"), id) ;
328
        success &= equals("Taxon specificEpithet", srcRS.getString("specificEpithet"), destRS.getString("SpecificEpithet"), id) ;
329
        success &= equals("Taxon infraSpecificEpithet", srcRS.getString("infraSpecificEpithet"), destRS.getString("InfraSpecificEpithet"), id) ;
330

  
331
        success &= equals("Taxon websearchname", srcRS.getString("nameCache"), destRS.getString("WebSearchName"), id);
332
//TODO        success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
333
//FIXME sensu+auct. autoren       success &= equals("Taxon authority", srcRS.getString("authorshipCache"), destRS.getString("AuthorString"), id);
334
//FIXME sensu+auct. autoren        success &= equals("Taxon FullName", srcRS.getString("nameTitleCache"), destRS.getString("FullName"), id);
335
//TODO        success &= isNull("NomRefString", destRS);
336
//TODO        success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id);  //in ERMS according to SQL script same as FullName, no nom.ref. information attached
337

  
338
//TODO        success &= equals("Taxon NameStatusFk", nullSafeInt(srcRS, "nsId"),nullSafeInt( destRS,"NameStatusFk"), id);
339
        success &= equals("Taxon NameStatusCache", srcRS.getString("nsTitle"), destRS.getString("NameStatusCache"), id);
340

  
341
//        success &= equals("Taxon TaxonStatusFk", mapTaxStatusFk(srcRS.getString("taxStatus")), nullSafeInt( destRS,"TaxonStatusFk"), id);
342
//        success &= equals("Taxon TaxonStatusCache", mapTaxStatus(srcRS.getString("taxStatus")), destRS.getString("TaxonStatusCache"), id);
343

  
344
//        //TODO ParentTaxonFk
345
//        Integer orgigTypeNameFk = nullSafeInt(srcRS, "tu_typetaxon");
346
//        success &= equals("Taxon TypeNameFk", orgigTypeNameFk == null? null : "tu_id: " + orgigTypeNameFk, destRS.getString("typeSourceId"), id);
347
////TODO  success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id);
348
          //quality status, according to SQL always constant, could be changed in future
349
        success &= equals("Taxon QualityStatusFK", 2, nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
350
        success &= equals("Taxon QualityStatusCache", "Added by Database Management Team", destRS.getString("QualityStatusCache"), id);
351
//        //TODO TreeIndex
352
          success &= isNull("FossilStatusFk", destRS);
353
          success &= isNull("FossilStatusCache", destRS);
334 354
        success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
335 355
        success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
336
        success &= isNull("ExpertGUID", destRS);  //only relevant after merge
337
        success &= isNull("ExpertName", destRS);  //only relevant after merge
338
        success &= isNull("SpeciesExpertGUID", destRS);  //only relevant after merge
339
        success &= equals("Taxon cache citation", srcRS.getString("cache_citation"), destRS.getString("CacheCitation"), id);
340
        //LastAction(Date) handled in separate method
356
        success &= isNull("ExpertGUID", destRS);  //according to SQL + PESI2014
357
//        success &= isNull("ExpertName", destRS);
358
//        success &= isNull("SpeciesExpertGUID", destRS);
359
//      success &= isNull("SpeciesExpertName", destRS);  //only relevant after merge
360
//FIXME !!        success &= equals("Taxon cache citation", srcRS.getString("secTitle"), destRS.getString("CacheCitation"), id);
361
        success &= equals("Taxon Last Action", srcRS.getString("lastAction"),  destRS.getString("LastAction"), id);
362
        success &= equals("Taxon Last Action Date", srcRS.getTimestamp("lastActionDate"),  destRS.getTimestamp("LastActionDate"), id);
363

  
341 364
        success &= isNull("GUID2", destRS);  //only relevant after merge
342 365
        success &= isNull("DerivedFromGuid2", destRS);  //only relevant after merge
343 366
        return success;
344 367
    }
345 368

  
346
    boolean namePartsFirst = true;
347
    private boolean compareNameParts(ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
348
        if (namePartsFirst){
349
            logger.warn("Validation of name parts not fully implemented (difficult). Currently validated via fullname");
350
            namePartsFirst = false;
351
        }
352
        int rankFk = srcRS.getInt("tu_rank");
353
        String genusOrUninomial = null;
354
        String infraGenericEpithet = null;
355
        String specificEpithet = null;
356
        String infraSpecificEpithet = null;
357
        if (rankFk <= 180){
358
            genusOrUninomial = srcRS.getString("tu_name");
359
        }else if (rankFk == 190){
360
            genusOrUninomial = srcRS.getString("tu1_name");
361
            infraGenericEpithet =  srcRS.getString("tu_name");
362
            //TODO does not work this way
363
//        }else if (rankFk == 220){
364
//            genusOrUninomial = destRS.getString("p_GenusOrUninomial");
365
//            infraGenericEpithet = destRS.getString("p_InfraGenericEpithet");
366
//            specificEpithet = srcRS.getString("tu_name");
367
        }else{
368
            //TODO exception
369
            return false;
369
    private String mapTaxStatus(String string) {
370
        if (string == null){
371
            return null;
372
        }else if ("Synonym".equals(string)){
373
            return "synonym";
374
        }else if ("Taxon".equals(string)){
375
            return "accepted";
370 376
        }
371
        boolean result = testEpis(destRS, genusOrUninomial, infraGenericEpithet,
372
                specificEpithet, infraSpecificEpithet, id);
373
        return result;
377
        return null;
374 378
    }
375 379

  
376
    private boolean testEpis(ResultSet destRS, String genusOrUninomial, String infraGenericEpithet, String specificEpithet,
377
            String infraSpecificEpithet, String id) throws SQLException {
378
        boolean result = equals("Taxon genusOrUninomial", genusOrUninomial, destRS.getString("GenusOrUninomial"), id) ;
379
        result &= equals("Taxon infraGenericEpithet", infraGenericEpithet, destRS.getString("InfraGenericEpithet"), id) ;
380
        result &= equals("Taxon specificEpithet", specificEpithet, destRS.getString("SpecificEpithet"), id) ;
381
        result &= equals("Taxon infraSpecificEpithet", infraSpecificEpithet, destRS.getString("InfraSpecificEpithet"), id) ;
382
        return result;
380
    private Integer mapTaxStatusFk(String string) {
381
        if (string == null){
382
            return null;
383
        }else if ("Synonym".equals(string)){
384
            return PesiTransformer.T_STATUS_SYNONYM;
385
        }else if ("Taxon".equals(string)){
386
            return PesiTransformer.T_STATUS_ACCEPTED;
387
        }
388
        return null;
383 389
    }
384 390

  
385
    private String normalizeRank(String string, ResultSet srcRS, String id) throws SQLException {
386
        String result = string
387
                .replace("Subforma", "Subform")
388
                .replace("Forma", "Form");
389
        int kingdomFk = Integer.valueOf(getSourceKingdomFk(srcRS, id));
390
        if (kingdomFk == 3 || kingdomFk == 4){
391
            result = result.replace("Subphylum", "Subdivision");
392
            result = result.replace("Phylum", "Division");
393
        }
394
        return result;
391
    private String normalizeRank(String rankStr, ResultSet srcRS, String id) throws SQLException {
392
        if (rankStr == null){return null;
393
        }else if (rankStr.equals("Convar")){return "Convariety";
394
        }else if (rankStr.equals("Unranked (infrageneric)")){return "Tax. infragen.";
395
        }else if (rankStr.equals("Unranked (infraspecific)")){return "Tax. infraspec.";
396
        }else if (rankStr.equals("Coll. species")){return "Coll. Species";
397
        }else if (rankStr.equals("Species Aggregate")){return "Aggregate";
398
        }else if (rankStr.equals("Subsection bot.")){return "Subsection";
399
        }return rankStr;
395 400
    }
396 401

  
397 402
    //see also ErmsTaxonImport.getExpectedTitleCache()
......
451 456
        }
452 457
    }
453 458

  
454
    private boolean compareKingdom(String messageStart, ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
455
        String srcKingdom = getSourceKingdomFk(srcRS, id);
456
        Integer intDest = nullSafeInt(destRS, "KingdomFk");
457
        if (intDest == null){
458
            logger.warn(id +": " + messageStart + " must never be null for destination. Biota needs to be 0, all the rest needs to have >0 int value.");
459
            return false;
460
        }else{
461
            return equals(messageStart, srcKingdom, String.valueOf(intDest), id);
462
        }
463
    }
464

  
465
    private String getSourceKingdomFk(ResultSet srcRS, String id) throws SQLException {
466
        String strSrc = srcRS.getString("acc_sp");
467
        if (strSrc == null){
468
            strSrc = srcRS.getString("tu_sp");
469
        }
470
        if (strSrc == null){
471
            if ("1".equals(id)){
472
                strSrc = "0";  //Biota
473
            }else if ("147415".equals(id)){
474
                strSrc = "6";  //Monera is synonym of Bacteria
475
            }else{
476
                strSrc = id;
477
            }
478
        }else{
479
            strSrc = strSrc.substring(1);
480
            strSrc = strSrc.substring(0, strSrc.indexOf("#"));
481
        }
482
        return strSrc;
483
    }
484

  
485 459
    private boolean testSingleTaxonRelations(int n) throws SQLException {
486 460
        boolean success = true;
487 461
        ResultSet srcRS = source.getResultSet(""
488 462
                + " SELECT t.* "
489 463
                + " FROM tu t "
490 464
                + " WHERE tu_acctaxon <> id "
491
                + " ORDER BY CAST(t.id as nvarchar(20)) ");
465
                + " ORDER BY CAST(t.id as char(20)) ");
492 466
        ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
493 467
                + " FROM RelTaxon rel "
494 468
                + "    LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
......
518 492

  
519 493
    private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
520 494
        boolean success = true;
521
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, MN.*, s.*, su.sourceuse_name "
495
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, MN.*, s.*, su.sourceuse_name "
522 496
                + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
523 497
                + "    LEFT JOIN sources s ON s.id = MN.source_id "
524 498
                + "    LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
525
                + " ORDER BY CAST(tu.id as nvarchar(20)), MN.sourceuse_id, s.id ");  //, no.note (not possible because ntext
499
                + " ORDER BY CAST(tu.id as char(20)), MN.sourceuse_id, s.id ");  //, no.note (not possible because ntext
526 500
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
527 501
                + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
528 502
                + "    INNER JOIN Source s ON s.SourceId = ats.SourceFk "
......
553 527

  
554 528
    private boolean testSingleNotes(int n) throws SQLException {
555 529
        boolean success = true;
556
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, no.*, l.LanName "
530
        ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, no.*, l.LanName "
557 531
                + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
558 532
                + "    LEFT JOIN languages l ON l.LanID = no.lan_id "
559
                + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable ");  //, no.note (not possible because ntext
533
                + " ORDER BY CAST(tu.id as char(20)), no.type, no.noteSortable ");  //, no.note (not possible because ntext
560 534
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
561 535
                + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
562 536
                + "    LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
......
573 547
                + "   LEFT JOIN notes_sessions MN ON no.id = MN.note_id "
574 548
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
575 549
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
576
                + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable, s.sessiondate DESC, a.id DESC ");
550
                + " ORDER BY CAST(tu.id as char(20)), no.type, no.noteSortable, s.sessiondate DESC, a.id DESC ");
577 551

  
578 552
        while (srcRs.next() && destRs.next()){
579 553
            success &= testSingleNote(srcRs, destRs);
......
606 580

  
607 581
    private boolean testSingleDistributions(int n) throws SQLException {
608 582
        boolean success = true;
609
        ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId,"
610
                + " gu.gazetteer_id, dr.*, gu.id guId, gu.gu_name "
611
                + " FROM dr INNER JOIN tu ON dr.tu_id = tu.id "
612
                + "    LEFT JOIN gu ON gu.id = dr.gu_id "
613
                + " ORDER BY CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)), gu.gazetteer_id, gu.gu_name, dr.noteSortable ");  //, dr.note (not possible because ntext
614
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaERMSGazetteerId, oc.*, a.AreaName "
583
        ResultSet srcRs = source.getResultSet(
584
                  " SELECT CAST(tb.name_id as char(20)) AS tid, a.idInVocabulary, a.titleCache area, st.uuid statusUuid, "
585
                + "        CASE WHEN deb.updated IS NOT NULL THEN deb.updated ELSE deb.created END as lastActionDate, "
586
                + "        CASE WHEN deb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
587
                + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
588
                + "    LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
589
                + "    LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
590
                + "    LEFT JOIN DefinedTermBase st ON st.id = deb.status_id "
591
                + distributionCountWhere
592
                + " ORDER BY CAST(tb.name_id as char(20)), a.idInVocabulary, a.titleCache ");
593
        ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaEmCode, oc.*, a.AreaName "
615 594
                + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
616 595
                + "    LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
617 596
                + " WHERE t." + origEuroMed
618
                + " ORDER BY t.IdInSource, a.AreaERMSGazetteerId, a.AreaName, oc.Notes ");
619
        ResultSet srcRsLastAction = source.getResultSet(""
620
                + " SELECT dr.id, s.sessiondate, a.action_name, s.ExpertName "
621
                + " FROM dr "
622
                + "   INNER JOIN tu ON tu.id = dr.tu_id "
623
                + "   LEFT JOIN gu ON gu.id = dr.gu_id "
624
                + "   LEFT JOIN dr_sessions MN ON dr.id = MN.dr_id "
625
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
626
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
627
                + " ORDER BY CAST(tu.id as nvarchar(20)), gu.gazetteer_id, gu.gu_name, s.sessiondate DESC, a.id DESC ");
597
                + " ORDER BY t.IdInSource, a.AreaEmCode, a.AreaName, oc.Notes ");
628 598
        int count = 0;
629 599
        while (srcRs.next() && destRs.next()){
630 600
            success &= testSingleDistribution(srcRs, destRs);
631
            //there are >1000 duplicates in dr, therefore this creates lots of warnings
632
            success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "Distribution");
633 601
            count++;
634 602
        }
635 603
        success &= equals("Distribution count differs", n, count, "-1");
......
637 605
    }
638 606

  
639 607
    private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
640
        String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("gu_name"));
641
        boolean success = equals("Distribution taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
642
        success &= equals("Distribution gazetteer_id ", srcRs.getString("gazetteer_id"), destRs.getString("AreaERMSGazetteerId"), id);
643
        success &= equals("Distribution area name ", srcRs.getString("gu_name"), destRs.getString("AreaName"), id);
644
        success &= equals("Distribution area name cache", srcRs.getString("gu_name"), destRs.getString("AreaNameCache"), id);
645
        success &= equals("Distribution OccurrenceStatusFk", 1, destRs.getInt("OccurrenceStatusFk"), id);
646
        success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
647
        //TODO see comments
648
        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
649
        success &= isNull("SourceCache", destRs);  //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
650
        success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
651
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in ERMS
652
        //SpeciesExpertName,LastAction,LastActionDate handled in separate method
608
        String id = String.valueOf(srcRs.getInt("tid") + "-" + srcRs.getString("area"));
609
        boolean success = equals("Distribution taxonID ", "NameId: " + String.valueOf(srcRs.getInt("tid")), destRs.getString("IdInSource"), id);
610
        success &= equals("Distribution AreaEmCode ", srcRs.getString("idInVocabulary"), destRs.getString("AreaEmCode"), id);
611
//        success &= equals("Distribution area name ", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaName"), id);
612
        success &= equals("Distribution area name cache", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaNameCache"), id);
613
        success &= equals("Distribution OccurrenceStatusFk", mapStatus(srcRs.getString("statusUuid")), destRs.getInt("OccurrenceStatusFk"), id);
614
//TODO        success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
615
        success &= isNull("SourceFk", destRs);  //sources should be moved to extra table only, according to script there were values, but in PESI 2014 values existed only in OccurrenceSource table (for all only E+M records)
616
        success &= isNull("SourceCache", destRs);  //sources should be moved to extra table, see above
617
//TODO        success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
618
        success &= isNull("SpeciesExpertGUID", destRs);  //SpeciesExpertGUID does not exist in EM and according to script
619
        success &= isNull("SpeciesExpertName", destRs);  //SpeciesExpertName does not exist in EM and according to script
620
        success &= equals("Distribution Last Action", srcRs.getString("lastAction"),  destRs.getString("LastAction"), id);
621
        success &= equals("Distribution Last Action Date", srcRs.getTimestamp("lastActionDate"),  destRs.getTimestamp("LastActionDate"), id);
653 622
        return success;
654 623
    }
655 624

  
625
    /**
626
     * @param string
627
     * @return
628
     */
629
    private Integer mapStatus(String uuidStr) {
630
        UUID uuid = UUID.fromString(uuidStr);
631
        if (uuid.equals(PresenceAbsenceTerm.uuidNativeError) ){  //native, reported in error
632
            return PesiTransformer.STATUS_ABSENT;
633
        }else if (uuid.equals(PresenceAbsenceTerm.uuidIntroducesAdventitious)  //casual, introduced adventitious
634
                || uuid.equals(PresenceAbsenceTerm.uuidIntroducedUncertainDegreeNaturalisation)//introduced: uncertain degree of naturalisation
635
                || uuid.equals(PresenceAbsenceTerm.uuidIntroduced)){
636
            return PesiTransformer.STATUS_INTRODUCED;
637
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNative) ){  //native
638
            return PesiTransformer.STATUS_NATIVE;
639
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNaturalised) ){  //naturalised
640
            return PesiTransformer.STATUS_NATURALISED;
641
        }else if (uuid.equals(PresenceAbsenceTerm.uuidNativePresenceQuestionable) ){  //native, presence questionable
642
            return PesiTransformer.STATUS_DOUBTFUL;
643
        }else if (uuid.equals(PresenceAbsenceTerm.uuidCultivated) ){  //cultivated
644
            return PesiTransformer.STATUS_MANAGED;
645
        }else if (uuid.equals(BerlinModelTransformer.uuidStatusUndefined) ){  //native, reported in error
646
            return -1;
647
        }
648

  
649
        return null;
650
    }
651

  
652
    private String normalizeDistrArea(String area) {
653
        if (area == null){
654
            return null;
655
        }else if ("France".equals(area)){return "French mainland";
656
        }else if ("France, with Channel Islands and Monaco".equals(area)){return "France";
657
        }else if ("Greece".equals(area)){return "Greece with Cyclades and more islands";
658
        }else if ("Spain, with Gibraltar and Andorra (without Bl and Ca)".equals(area)){return "Spain";
659
        }else if ("Italy, with San Marino and Vatican City (without Sa and Si(S))".equals(area)){return "Italy";
660
        }else if ("Morocco, with Spanish territories".equals(area)){return "Morocco";
661
        }else if ("Serbia including Kosovo and Vojvodina".equals(area)){return "Serbia including Vojvodina and with Kosovo";
662
        }else if ("Caucasia (Ab + Ar + Gg + Rf(CS))".equals(area)){return "Caucasus region";
663
        }else if ("Georgia, with Abchasia and Adzharia".equals(area)){return "Georgia";
664
        }else if ("Canary Is.".equals(area)){return "Canary Islands";
665
        }else if ("Kriti with Karpathos, Kasos & Gavdhos".equals(area)){return "Crete with Karpathos, Kasos & Gavdhos";
666
        }else if ("Ireland, with N Ireland".equals(area)){return "Ireland";
667
        }else if ("mainland Spain".equals(area)){return "Kingdom of Spain";
668
        }else if ("Portugal".equals(area)){return "Portuguese mainland";
669
        }else if ("Svalbard".equals(area)){return "Svalbard with Björnöya and Jan Mayen";
670
        }else if ("Norway".equals(area)){return "Norwegian mainland";
671
        }else if ("Ukraine".equals(area)){return "Ukraine including Crimea";
672
        }else if ("Turkey-in-Europe".equals(area)){return "European Turkey";
673
        }else if ("Azerbaijan".equals(area)){return "Azerbaijan including Nakhichevan";
674
        }else if ("Ireland".equals(area)){return "Republic of Ireland";
675
        }else if ("France".equals(area)){return "French mainland";
676
        }
677
        return area;
678
    }
679

  
656 680
    private boolean testSingleCommonNames(int n) throws SQLException {
657 681
        boolean success = true;
658 682
        ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
659 683
                + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
660
                + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
684
                + " ORDER BY CAST(tu.id as char(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
661 685
        ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
662 686
                + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
663 687
                + " WHERE t." + origEuroMed
......
670 694
                + "   LEFT JOIN vernaculars_sessions MN ON v.id = MN.vernacular_id "
671 695
                + "   LEFT JOIN actions a ON a.id = MN.action_id "
672 696
                + "   LEFT JOIN sessions s ON s.id = MN.session_id  "
673
                + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id, s.sessiondate DESC, a.id DESC ");
697
                + " ORDER BY CAST(tu.id as char(20)), ISNULL([639_3],[639_2]), v.vername, v.id, s.sessiondate DESC, a.id DESC ");
674 698
        int count = 0;
675 699
        while (srcRs.next() && destRs.next()){
676 700
            success &= testSingleCommonName(srcRs, destRs);
......
818 842

  
819 843
    private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
820 844
        if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
821
            String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+  srcDate + "; Destination: " + destDate;
822
            logger.warn(message);
823
            return false;
845
            LocalDate date1 = srcDate.toLocalDateTime().toLocalDate();
846
            LocalDate date2 = destDate.toLocalDateTime().toLocalDate();
847
            if (date1.equals(date2) || date1.plusDays(1).equals(date2)){
848
                logger.info(messageStart + " were (almost) equal: " + srcDate);
849
                return true;
850
            }else{
851
                String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+  srcDate + "; Destination: " + destDate;
852
                logger.warn(message);
853
                return false;
854
            }
824 855
        }else{
825 856
            logger.info(messageStart + " were equal: " + srcDate);
826 857
            return true;
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/out/PesiTransformer.java
152 152
	private static int NAME_ST_SPECIES_INQUIRENDA = 28;
153 153

  
154 154
	// TaxonStatus
155
	private static int T_STATUS_ACCEPTED = 1;
156
	private static int T_STATUS_SYNONYM = 2;
155
	public static int T_STATUS_ACCEPTED = 1;
156
	public static int T_STATUS_SYNONYM = 2;
157 157
	private static int T_STATUS_PARTIAL_SYN = 3;
158 158
	private static int T_STATUS_PRO_PARTE_SYN = 4;
159 159
	private static int T_STATUS_UNRESOLVED = 5;
......
595 595

  
596 596
	// OccurrenceStatus
597 597
	private static int STATUS_PRESENT = 1;
598
	private static int STATUS_ABSENT = 2;
599
	private static int STATUS_NATIVE = 3;
600
	private static int STATUS_INTRODUCED = 4;
601
	private static int STATUS_NATURALISED = 5;
598
	public static int STATUS_ABSENT = 2;
599
	public static int STATUS_NATIVE = 3;
600
	public static int STATUS_INTRODUCED = 4;
601
	public static int STATUS_NATURALISED = 5;
602 602
	private static int STATUS_INVASIVE = 6;
603
	private static int STATUS_MANAGED = 7;
604
	private static int STATUS_DOUBTFUL = 8;
603
	public static int STATUS_MANAGED = 7;
604
	public static int STATUS_DOUBTFUL = 8;
605 605

  
606 606
	private final Map<String, Integer> tdwgKeyMap = new HashMap<>();
607 607
	private final Map<Integer, String> areaCacheMap = new HashMap<>();

Also available in: Unified diff