2 * Copyright (C) 2019 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.io
.pesi
.erms
.validation
;
11 import java
.sql
.ResultSet
;
12 import java
.sql
.SQLException
;
13 import java
.sql
.Timestamp
;
15 import org
.apache
.commons
.lang
.StringUtils
;
16 import org
.apache
.log4j
.Logger
;
18 import eu
.etaxonomy
.cdm
.app
.pesi
.PesiDestinations
;
19 import eu
.etaxonomy
.cdm
.app
.pesi
.PesiSources
;
20 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
21 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
22 import eu
.etaxonomy
.cdm
.io
.pesi
.out
.PesiTransformer
;
25 * Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
30 public class PesiErmsValidator
{
32 private static final Logger logger
= Logger
.getLogger(PesiErmsValidator
.class);
34 private static final Source defaultSource
= PesiSources
.PESI2019_ERMS();
35 private static final Source defaultDestination
= PesiDestinations
.pesi_test_local_CDM_ERMS2PESI();
37 private Source source
= defaultSource
;
38 private Source destination
= defaultDestination
;
39 private String moneraFilter
= " NOT IN (-1)"; // 147415;
40 // private String moneraFilter = " NOT IN (147415)"; // 147415;
42 private String origErms
= "OriginalDB = 'ERMS' ";
44 public void invoke(Source source
, Source destination
){
45 logger
.warn("Validate destination " + destination
.getDatabase());
46 boolean success
= true;
49 this.destination
= destination
;
50 // success &= testReferences(); //ready, few minor issues to be discussed with VLIZ
51 success
&= testTaxa();
52 // success &= testTaxonRelations();
53 // success &= testCommonNames(); //source(s) discuss VLIZ, exact duplicates (except for sources), Anus(Korur)
54 // success &= testDistributions(); //>1000 duplicates in "dr", sources (OccurrenceSource table), area spellings(Baelt Sea), 1 long note
55 // success &= testNotes(); //ecology & link notes test (only count tested), sources untested (NoteSource table), few duplicates,
56 success
&= testAdditionalTaxonSources(); //ready
57 } catch (Exception e
) {
62 System
.out
.println("end validation " + (success?
"":"NOT ") + "successful.");
65 private boolean testAdditionalTaxonSources() throws SQLException
{
66 System
.out
.println("Start validate additional taxon sources");
67 boolean success
= testAdditionalTaxonSourcesCount();
69 success
&= testSingleAdditionalTaxonSources(source
.getUniqueInteger(countAddtionalTaxonSource
));
74 private boolean testNotes() throws SQLException
{
75 System
.out
.println("Start validate notes");
76 boolean success
= testNotesCount();
78 success
&= testSingleNotes(source
.getUniqueInteger("SELECT count(*) FROM notes "));
83 private boolean testDistributions() throws SQLException
{
84 System
.out
.println("Start validate distributions");
85 boolean success
= testDistributionCount();
87 success
&= testSingleDistributions(source
.getUniqueInteger("SELECT count(*) FROM dr "));
92 private boolean testCommonNames() throws SQLException
{
93 System
.out
.println("Start validate common names");
94 boolean success
= testCommonNameCount();
96 success
&= testSingleCommonNames(source
.getUniqueInteger("SELECT count(*) FROM vernaculars "));
101 private boolean testTaxonRelations() throws SQLException
{
102 System
.out
.println("Start validate taxon relations");
103 boolean success
= testTaxonRelationCount();
106 success
&= testSingleTaxonRelations(source
.getUniqueInteger(countSynonymRelation
));
111 private boolean testTaxa() throws SQLException
{
112 System
.out
.println("Start validate taxa");
113 boolean success
= testTaxaCount();
115 success
&= testSingleTaxa(source
.getUniqueInteger(countTaxon
));
120 private boolean testReferences() throws SQLException
{
121 System
.out
.println("Start validate references");
122 boolean success
= testReferenceCount();
124 success
&= testSingleReferences();
129 private final String countAddtionalTaxonSource
= "SELECT count(*) FROM tu_sources ts WHERE ts.tu_id " + moneraFilter
;
130 private boolean testAdditionalTaxonSourcesCount() {
131 int countSrc
= source
.getUniqueInteger(countAddtionalTaxonSource
);
132 int countDest
= destination
.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
133 return equals("AdditionalTaxonSource count ", countSrc
, countDest
, String
.valueOf(-1));
136 private boolean testNotesCount() {
137 int countSrc
= source
.getUniqueInteger("SELECT count(*) FROM notes ");
138 int countDest
= destination
.getUniqueInteger("SELECT count(*) FROM Note "
139 + " WHERE NOT (NoteCategoryFk = 4 AND LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) ");
140 boolean result
= equals("Notes count ", countSrc
, countDest
, String
.valueOf(-1));
142 countSrc
= source
.getUniqueInteger("SELECT count(*) FROM tu "
143 + " WHERE (tu_marine IS NOT NULL OR tu_brackish IS NOT NULL OR tu_fresh IS NOT NULL OR tu_terrestrial IS NOT NULL) "
144 + " AND tu.id " + moneraFilter
);
145 countDest
= destination
.getUniqueInteger("SELECT count(*) FROM Note "
146 + " WHERE (NoteCategoryFk = 4 AND LastAction IS NULL) ");
147 result
&= equals("Notes ecology count ", countSrc
, countDest
, String
.valueOf(-1));
149 countSrc
= source
.getUniqueInteger("SELECT count(*) FROM links ");
150 countDest
= destination
.getUniqueInteger("SELECT count(*) FROM Note "
151 + " WHERE NoteCategoryFk IN (22,23,24) ");
152 result
&= equals("Notes link count ", countSrc
, countDest
, String
.valueOf(-1));
157 private boolean testDistributionCount() {
158 int countSrc
= source
.getUniqueInteger("SELECT count(*) FROM dr ");
159 int countDest
= destination
.getUniqueInteger("SELECT count(*) FROM Occurrence ");
160 return equals("Occurrence count ", countSrc
, countDest
, String
.valueOf(-1));
163 private boolean testCommonNameCount() {
164 int countSrc
= source
.getUniqueInteger("SELECT count(*) FROM vernaculars ");
165 int countDest
= destination
.getUniqueInteger("SELECT count(*) FROM CommonName ");
166 return equals("CommonName count ", countSrc
, countDest
, String
.valueOf(-1));
169 private final String countSynonymRelation
= "SELECT count(*) FROM tu WHERE tu_acctaxon <> id AND id " + moneraFilter
;
170 private final String countParentRelation
= "SELECT count(*) FROM tu WHERE tu_parent is not null and tu_status = 1 and id <> tu_parent AND id " + moneraFilter
;
171 private boolean testTaxonRelationCount() {
173 int countSrc
= source
.getUniqueInteger(countSynonymRelation
);
175 int countDest
= destination
.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
176 boolean result
= equals("Synonym count ", countSrc
, countDest
, String
.valueOf(-1));
177 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'currently placed%'
178 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'currently held%'
179 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'sy%' or tu_unacceptreason like '%jun%syn%'
180 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason = '(synonym)'
181 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason = 'reverted genus transfer'
182 // update Match_RelStat set RelTaxon = 103 where tu_unacceptreason like 'misapplied%'
183 // update Match_RelStat set RelTaxon = 104 where tu_unacceptreason like 'part% synonym%'
184 // update Match_RelStat set RelTaxon = 106 where tu_unacceptreason = 'heterotypic synonym' or tu_unacceptreason = 'subjective synonym'
185 // update Match_RelStat set RelTaxon = 107 where tu_unacceptreason like '%homot%syn%' or tu_unacceptreason = 'objective synonym' synyonym
186 // update Match_RelStat set RelTaxon = 107 where tu_unacceptreason like '%bas[iy][no]%ny%'
189 countSrc
= source
.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter
+ " AND ("
190 + " tu_unacceptreason like '%bas[iy][no]%ny%' OR tu_unacceptreason = 'original combination' "
191 + " OR tu_unacceptreason = 'Subsequent combination' OR tu_unacceptreason like '%genus transfer%' "
192 + " OR tu_unacceptreason = 'genus change' " //1
193 + " OR tu_unacceptreason like '%homon%' " // 2
194 + " OR tu_unacceptreason like '%spell%' OR tu_unacceptreason like 'lapsus %' " //16
197 countDest
= destination
.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
198 result
= equals("Taxon name relation count ", countSrc
, countDest
, String
.valueOf(-1));
201 countSrc
= source
.getUniqueInteger(countParentRelation
);
203 countDest
= destination
.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
204 result
&= equals("Tax included in count ", countSrc
, countDest
, String
.valueOf(-1));
209 private final String countTaxon
= "SELECT count(*) FROM tu WHERE id " + moneraFilter
;
210 private boolean testTaxaCount() {
211 int countSrc
= source
.getUniqueInteger(countTaxon
);
212 int countDest
= destination
.getUniqueInteger("SELECT count(*) FROM Taxon ");
213 boolean result
= equals("Taxon count ", countSrc
, countDest
, String
.valueOf(-1));
216 countSrc
= source
.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter
+ " AND ("
217 + " tu_unacceptreason like '%inval%' OR tu_unacceptreason like '%not val%' "
218 + " OR tu_unacceptreason like '%illeg%' OR tu_unacceptreason like '%nud%' "
219 + " OR tu_unacceptreason like '%rej.%' OR tu_unacceptreason like '%superfl%' "
220 + " OR tu_unacceptreason like '%Comb. nov%' OR tu_unacceptreason like '%New name%' "
221 + " OR tu_unacceptreason = 'new combination' "
222 + " OR tu_status IN (3,5,6,7,8) )");
223 countDest
= destination
.getUniqueInteger("SELECT count(*) FROM Taxon WHERE NameStatusFk IS NOT NULL ");
224 result
= equals("Taxon name status count ", countSrc
, countDest
, String
.valueOf(-1));
229 private boolean testSingleTaxa(int n
) throws SQLException
{
230 boolean success
= true;
231 ResultSet srcRS
= source
.getResultSet(""
232 + " SELECT t.*, tu1.tu_name tu1_name, r.rank_name, acc.tu_sp as acc_sp, st.status_name, "
233 + " type.tu_displayname typename, type.tu_authority typeauthor, "
234 + " fo.fossil_name, qs.qualitystatus_name "
236 + " LEFT JOIN tu as tu1 on t.tu_parent = tu1.id"
237 + " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 40 AND rank_name = 'Subdivision' OR rank_id = 122 AND rank_name='Subsection')) as r ON t.tu_rank = r.rank_id "
238 + " LEFT JOIN tu acc ON acc.id = t.tu_acctaxon "
239 + " LEFT JOIN status st ON st.status_id = t.tu_status "
240 + " LEFT JOIN tu type ON type.id = t.tu_typetaxon "
241 + " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
242 + " LEFT JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
243 + " WHERE t.id " + moneraFilter
244 + " ORDER BY CAST(t.id as nvarchar(20)) ");
245 ResultSet destRS
= destination
.getResultSet("SELECT t.*, "
246 + " pt.GenusOrUninomial p_GenusOrUninomial, pt.InfraGenericEpithet p_InfraGenericEpithet, pt.SpecificEpithet p_SpecificEpithet, "
247 + " s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
249 + " LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
250 + " LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
251 + " LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
252 + " LEFT JOIN Source s ON s.SourceId = t.SourceFk "
253 + " WHERE t."+ origErms
254 + " ORDER BY t.IdInSource");
255 ResultSet srcRsLastAction
= source
.getResultSet(""
256 + " SELECT t.id, s.sessiondate, a.action_name, s.ExpertName "
258 + " LEFT OUTER JOIN tu_sessions MN ON t.id = MN.tu_id "
259 + " LEFT JOIN actions a ON a.id = MN.action_id "
260 + " LEFT JOIN sessions s ON s.id = MN.session_id "
261 + " ORDER BY CAST(t.id as nvarchar(20)), s.sessiondate DESC, a.id DESC ");
263 while (srcRS
.next() && destRS
.next()){
264 success
&= testSingleTaxon(srcRS
, destRS
);
265 //TODO success &= testLastAction(srcRsLastAction, destRS, String.valueOf(srcRS.getInt("id")), "Taxon");
268 success
&= equals("Taxon count for single compare", n
, i
, String
.valueOf(-1));
273 private boolean testSingleTaxon(ResultSet srcRS
, ResultSet destRS
) throws SQLException
{
274 String id
= String
.valueOf(srcRS
.getInt("id"));
275 boolean success
= equals("Taxon ID", "tu_id: " + srcRS
.getInt("id"), destRS
.getString("IdInSource"), id
);
276 success
&= equals("Taxon source", "ERMS export for PESI", destRS
.getString("sourceName"), id
);
277 //TODO some success &= compareKingdom("Taxon kingdom", srcRS, destRS, id);
278 success
&= equals("Taxon rank fk", srcRS
.getString("tu_rank"), destRS
.getString("RankFk"), id
);
279 success
&= equals("Taxon rank cache", normalizeRank(srcRS
.getString("rank_name"), srcRS
, id
), destRS
.getString("Rank"), id
);
280 success
&= compareNameParts(srcRS
, destRS
, id
);
282 success
&= equals("Taxon websearchname", srcRS
.getString("tu_displayname"), destRS
.getString("WebSearchName"), id
);
283 // success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
284 success
&= equals("Taxon authority", srcRS
.getString("tu_authority"), destRS
.getString("AuthorString"), id
);
285 // success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
286 success
&= isNull("NomRefString", destRS
);
287 // success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id); //according to SQL script same as FullName, no nom.ref. information attached
289 //TODO success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
290 //TODO success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
292 //TODO success &= equals("Taxon TaxonStatusFk", nullSafeInt(srcRS, "tu_status"),nullSafeInt( destRS,"TaxonStatusFk"), id);
293 //TODO success &= equals("Taxon TaxonStatusCache", srcRS.getString("status_name"), destRS.getString("TaxonStatusCache"), id);
296 Integer orgigTypeNameFk
= nullSafeInt(srcRS
, "tu_typetaxon");
297 success
&= equals("Taxon TypeNameFk", orgigTypeNameFk
== null?
null : "tu_id: " + orgigTypeNameFk
, destRS
.getString("typeSourceId"), id
);
298 //TODO success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id);
299 success
&= equals("Taxon QualityStatusFK", nullSafeInt(srcRS
, "tu_qualitystatus"),nullSafeInt( destRS
,"QualityStatusFk"), String
.valueOf(id
));
300 success
&= equals("Taxon QualityStatusCache", srcRS
.getString("qualitystatus_name"), destRS
.getString("QualityStatusCache"), id
);
302 success
&= equals("Taxon FossilStatusFk", nullSafeInt(srcRS
, "tu_fossil"),nullSafeInt( destRS
,"FossilStatusFk"), String
.valueOf(id
));
303 success
&= equals("Taxon FossilStatusCache", srcRS
.getString("fossil_name"), destRS
.getString("FossilStatusCache"), id
);
304 success
&= equals("Taxon GUID", srcRS
.getString("GUID"), destRS
.getString("GUID"), id
);
305 success
&= equals("Taxon DerivedFromGuid", srcRS
.getString("GUID"), destRS
.getString("DerivedFromGuid"), id
); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
306 success
&= isNull("ExpertGUID", destRS
); //only relevant after merge
307 success
&= isNull("ExpertName", destRS
); //only relevant after merge
308 success
&= isNull("SpeciesExpertGUID", destRS
); //only relevant after merge
309 success
&= equals("Taxon cache citation", srcRS
.getString("cache_citation"), destRS
.getString("CacheCitation"), id
);
310 //LastAction(Date) handled in separate method
311 success
&= isNull("GUID2", destRS
); //only relevant after merge
312 success
&= isNull("DerivedFromGuid2", destRS
); //only relevant after merge
316 boolean namePartsFirst
= true;
317 private boolean compareNameParts(ResultSet srcRS
, ResultSet destRS
, String id
) throws SQLException
{
319 logger
.warn("Validation of name parts not fully implemented (difficult). Currently validated via fullname");
320 namePartsFirst
= false;
322 int rankFk
= srcRS
.getInt("tu_rank");
323 String genusOrUninomial
= null;
324 String infraGenericEpithet
= null;
325 String specificEpithet
= null;
326 String infraSpecificEpithet
= null;
328 genusOrUninomial
= srcRS
.getString("tu_name");
329 }else if (rankFk
== 190){
330 genusOrUninomial
= srcRS
.getString("tu1_name");
331 infraGenericEpithet
= srcRS
.getString("tu_name");
332 //TODO does not work this way
333 // }else if (rankFk == 220){
334 // genusOrUninomial = destRS.getString("p_GenusOrUninomial");
335 // infraGenericEpithet = destRS.getString("p_InfraGenericEpithet");
336 // specificEpithet = srcRS.getString("tu_name");
341 boolean result
= testEpis(destRS
, genusOrUninomial
, infraGenericEpithet
,
342 specificEpithet
, infraSpecificEpithet
, id
);
346 private boolean testEpis(ResultSet destRS
, String genusOrUninomial
, String infraGenericEpithet
, String specificEpithet
,
347 String infraSpecificEpithet
, String id
) throws SQLException
{
348 boolean result
= equals("Taxon genusOrUninomial", genusOrUninomial
, destRS
.getString("GenusOrUninomial"), id
) ;
349 result
&= equals("Taxon infraGenericEpithet", infraGenericEpithet
, destRS
.getString("InfraGenericEpithet"), id
) ;
350 result
&= equals("Taxon specificEpithet", specificEpithet
, destRS
.getString("SpecificEpithet"), id
) ;
351 result
&= equals("Taxon infraSpecificEpithet", infraSpecificEpithet
, destRS
.getString("InfraSpecificEpithet"), id
) ;
355 private String
normalizeRank(String string
, ResultSet srcRS
, String id
) throws SQLException
{
356 String result
= string
357 .replace("Subforma", "Subform")
358 .replace("Forma", "Form");
359 int kingdomFk
= Integer
.valueOf(getSourceKingdomFk(srcRS
, id
));
360 if (kingdomFk
== 3 || kingdomFk
== 4){
361 result
= result
.replace("Subphylum", "Subdivision");
362 result
= result
.replace("Phylum", "Division");
367 //see also ErmsTaxonImport.getExpectedTitleCache()
368 private String
srcFullName(ResultSet srcRs
) throws SQLException
{
369 String result
= null;
370 String epi
= srcRs
.getString("tu_name");
372 String display
= srcRs
.getString("tu_displayname");
373 String sp
= srcRs
.getString("tu_sp");
374 if (display
.indexOf(epi
) != display
.lastIndexOf(epi
) && !sp
.startsWith("#2#")){ //homonym, animal
375 result
= srcRs
.getString("tu_displayname").replaceFirst(epi
+" ", CdmUtils
.concat(" ", " "+epi
, srcRs
.getString("tu_authority")))+" ";
377 result
= CdmUtils
.concat(" ", srcRs
.getString("tu_displayname"), srcRs
.getString("tu_authority"));
382 private String
srcDisplayName(ResultSet srcRs
) throws SQLException
{
383 String result
= null;
384 String epi
= srcRs
.getString("tu_name");
386 String display
= "<i>"+srcRs
.getString("tu_displayname")+"</i>";
387 display
= display
.replace(" var. ", "</i> var. <i>").replace(" f. ", "</i> f. <i>");
388 String sp
= srcRs
.getString("tu_sp");
389 if (display
.indexOf(epi
) != display
.lastIndexOf(epi
) && !sp
.startsWith("#2#")){ //homonym, animal
390 result
= display
.replaceFirst(epi
+" ", CdmUtils
.concat(" ", " "+epi
, srcRs
.getString("tu_authority")))+" ";
392 result
= CdmUtils
.concat(" ", display
, srcRs
.getString("tu_authority"));
397 String lastLastActionId
= "-1";
398 private boolean testLastAction(ResultSet srcRsLastAction
, ResultSet destRs
, String id
, String table
) throws SQLException
{
400 boolean success
= true;
402 while (srcRsLastAction
.next()){
403 srcId
= String
.valueOf(srcRsLastAction
.getInt("id"));
404 if (!lastLastActionId
.equals(srcId
)){
405 lastLastActionId
= srcId
;
409 if(!id
.equals(srcId
)){
410 logger
.warn("Last Action SourceIDs are not equal: id: " +id
+ ", la-id: " + srcId
);
412 String destStr
= destRs
.getString("LastAction");
413 success
&= equals(table
+ " SpeciesExpertName", srcRsLastAction
.getString("ExpertName"), destRs
.getString("SpeciesExpertName"), id
); //mapping ExpertName => SpeciesExpertName according to SQL script
414 success
&= equals(table
+ " Last Action", srcRsLastAction
.getString("action_name"), destStr
== null?
null : destStr
, id
);
415 success
&= equals(table
+ " Last Action Date", srcRsLastAction
.getTimestamp("sessiondate"), destRs
.getTimestamp("LastActionDate"), id
);
418 } catch (Exception e
) {
424 private boolean compareKingdom(String messageStart
, ResultSet srcRS
, ResultSet destRS
, String id
) throws SQLException
{
425 String srcKingdom
= getSourceKingdomFk(srcRS
, id
);
426 Integer intDest
= nullSafeInt(destRS
, "KingdomFk");
427 if (intDest
== null){
428 logger
.warn(id
+": " + messageStart
+ " must never be null for destination. Biota needs to be 0, all the rest needs to have >0 int value.");
431 return equals(messageStart
, srcKingdom
, String
.valueOf(intDest
), id
);
435 private String
getSourceKingdomFk(ResultSet srcRS
, String id
) throws SQLException
{
436 String strSrc
= srcRS
.getString("acc_sp");
438 strSrc
= srcRS
.getString("tu_sp");
441 strSrc
= "1".equals(id
)?
"0":id
;
443 strSrc
= strSrc
.substring(1);
444 strSrc
= strSrc
.substring(0, strSrc
.indexOf("#"));
449 private boolean testSingleTaxonRelations(int n
) throws SQLException
{
450 boolean success
= true;
451 ResultSet srcRS
= source
.getResultSet(""
454 + " WHERE t.id "+ moneraFilter
+ " AND tu_acctaxon <> id "
455 + " ORDER BY CAST(t.id as nvarchar(20)) ");
456 ResultSet destRS
= destination
.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
457 + " FROM RelTaxon rel "
458 + " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
459 + " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
460 + " WHERE t1."+origErms
+" AND t2." + origErms
461 + " ORDER BY t1.IdInSource");
463 while (srcRS
.next() && destRS
.next()){
464 success
&= testSingleTaxonRelation(srcRS
, destRS
);
467 success
&= equals("Taxon relation count for single compare", n
, i
, String
.valueOf(-1));
471 private boolean testSingleTaxonRelation(ResultSet srcRS
, ResultSet destRS
) throws SQLException
{
472 String id
= String
.valueOf(srcRS
.getInt("id"));
473 boolean success
= equals("Taxon relation taxon1", "tu_id: " + srcRS
.getInt("id"), destRS
.getString("t1Id"), id
);
474 success
&= equals("Taxon relation taxon2", "tu_id: " + srcRS
.getInt("tu_acctaxon"), destRS
.getString("t2Id"), id
);
475 success
&= equals("Taxon relation qualifier fk", PesiTransformer
.IS_SYNONYM_OF
, destRS
.getInt("RelTaxonQualifierFk"), id
);
476 success
&= equals("Taxon relation qualifier cache", "is synonym of", destRS
.getString("RelQualifierCache"), id
);
477 //TODO enable after next import
478 // success &= isNull("notes", destRS);
479 //complete if no further relations need to added
483 private boolean testSingleAdditionalTaxonSources(int n
) throws SQLException
{
484 boolean success
= true;
485 ResultSet srcRs
= source
.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, MN.*, s.*, su.sourceuse_name "
486 + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
487 + " LEFT JOIN sources s ON s.id = MN.source_id "
488 + " LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
489 + " WHERE MN.tu_id " + moneraFilter
490 + " ORDER BY CAST(tu.id as nvarchar(20)), MN.sourceuse_id, s.id "); //, no.note (not possible because ntext
491 ResultSet destRs
= destination
.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
492 + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
493 + " INNER JOIN Source s ON s.SourceId = ats.SourceFk "
494 + " LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
495 + " WHERE t."+origErms
496 + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
498 while (srcRs
.next() && destRs
.next()){
499 success
&= testSingleAdditionalTaxonSource(srcRs
, destRs
);
502 success
&= equals("Notes count differs", n
, count
, "-1");
506 private boolean testSingleAdditionalTaxonSource(ResultSet srcRs
, ResultSet destRs
) throws SQLException
{
507 String id
= String
.valueOf(srcRs
.getInt("tuId") + "-" + srcRs
.getString("sourceuse_name"));
508 boolean success
= equals("Additional taxon source taxonID ", "tu_id: " + String
.valueOf(srcRs
.getInt("tuId")), destRs
.getString("IdInSource"), id
);
509 success
&= equals("Additional taxon source fk ", srcRs
.getString("source_id"), destRs
.getString("RefIdInSource"), id
); //currently we use the same id in ERMS and PESI
510 success
&= equals("Additional taxon source use fk ", srcRs
.getString("sourceuse_id"), destRs
.getString("SourceUseFk"), id
);
511 success
&= equals("Additional taxon source use cache ", srcRs
.getString("sourceuse_name"), destRs
.getString("SourceUseCache"), id
);
512 //TODO some records are still truncated ~ >820 characters
513 success
&= equals("Additional taxon source name cache ", srcRs
.getString("source_name"), destRs
.getString("SourceNameCache"), id
);
514 success
&= equals("Additional taxon source detail ", srcRs
.getString("pagenr"), destRs
.getString("SourceDetail"), id
);
519 private boolean testSingleNotes(int n
) throws SQLException
{
520 boolean success
= true;
521 ResultSet srcRs
= source
.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, no.*, l.LanName "
522 + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
523 + " LEFT JOIN languages l ON l.LanID = no.lan_id "
524 + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable "); //, no.note (not possible because ntext
525 ResultSet destRs
= destination
.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
526 + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
527 + " LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
528 + " LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
529 + " WHERE t." + origErms
530 + " AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
531 + " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1 ");
533 ResultSet srcRsLastAction
= source
.getResultSet(""
534 + " SELECT no.id, s.sessiondate, a.action_name, s.ExpertName "
536 + " INNER JOIN tu ON tu.id = no.tu_id "
537 + " LEFT JOIN languages l ON l.LanID = no.lan_id"
538 + " LEFT JOIN notes_sessions MN ON no.id = MN.note_id "
539 + " LEFT JOIN actions a ON a.id = MN.action_id "
540 + " LEFT JOIN sessions s ON s.id = MN.session_id "
541 + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable, s.sessiondate DESC, a.id DESC ");
543 while (srcRs
.next() && destRs
.next()){
544 success
&= testSingleNote(srcRs
, destRs
);
545 success
&= testLastAction(srcRsLastAction
, destRs
, String
.valueOf(srcRs
.getInt("id")), "Note");
548 success
&= equals("Notes count differs", n
, count
, "-1");
552 private boolean testSingleNote(ResultSet srcRs
, ResultSet destRs
) throws SQLException
{
553 String id
= String
.valueOf(srcRs
.getInt("tuId") + "-" + srcRs
.getString("type"));
554 boolean success
= equals("Note taxonID ", "tu_id: " + String
.valueOf(srcRs
.getInt("tuId")), destRs
.getString("IdInSource"), id
);
555 success
&= equals("Note Note_1 ", srcRs
.getString("note"), destRs
.getString("Note_1"), id
);
556 success
&= isNull("Note_2", destRs
);
557 success
&= equals("Note category cache", normalizeNoteCatCache(srcRs
.getString("type")), destRs
.getString("NoteCategoryCache"), id
);
558 success
&= equals("Note language ", srcRs
.getString("LanName"), destRs
.getString("Language"), id
);
559 success
&= isNull("Region", destRs
);
560 success
&= isNull("SpeciesExpertGUID", destRs
);
561 //SpeciesExpertName, LastAction, LastActionDate handled in separate method
566 private String
normalizeNoteCatCache(String string
) {
567 return StringUtils
.capitalize(string
)
568 .replace("Original Combination", "Original combination")
569 .replace("Taxonomic remark", "Taxonomic Remark");
572 private boolean testSingleDistributions(int n
) throws SQLException
{
573 boolean success
= true;
574 ResultSet srcRs
= source
.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId,"
575 + " gu.gazetteer_id, dr.*, gu.id guId, gu.gu_name "
576 + " FROM dr INNER JOIN tu ON dr.tu_id = tu.id "
577 + " LEFT JOIN gu ON gu.id = dr.gu_id "
578 + " ORDER BY CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)), gu.gazetteer_id, gu.gu_name, dr.noteSortable "); //, dr.note (not possible because ntext
579 ResultSet destRs
= destination
.getResultSet("SELECT t.IdInSource, a.AreaERMSGazetteerId, oc.*, a.AreaName "
580 + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
581 + " LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
582 + " WHERE t." + origErms
583 + " ORDER BY t.IdInSource, a.AreaERMSGazetteerId, a.AreaName, oc.Notes ");
584 ResultSet srcRsLastAction
= source
.getResultSet(""
585 + " SELECT dr.id, s.sessiondate, a.action_name, s.ExpertName "
587 + " INNER JOIN tu ON tu.id = dr.tu_id "
588 + " LEFT JOIN gu ON gu.id = dr.gu_id "
589 + " LEFT JOIN dr_sessions MN ON dr.id = MN.dr_id "
590 + " LEFT JOIN actions a ON a.id = MN.action_id "
591 + " LEFT JOIN sessions s ON s.id = MN.session_id "
592 + " ORDER BY CAST(tu.id as nvarchar(20)), gu.gazetteer_id, gu.gu_name, s.sessiondate DESC, a.id DESC ");
594 while (srcRs
.next() && destRs
.next()){
595 success
&= testSingleDistribution(srcRs
, destRs
);
596 //there are >1000 duplicates in dr, therefore this creates lots of warnings
597 success
&= testLastAction(srcRsLastAction
, destRs
, String
.valueOf(srcRs
.getInt("id")), "Distribution");
600 success
&= equals("Distribution count differs", n
, count
, "-1");
604 private boolean testSingleDistribution(ResultSet srcRs
, ResultSet destRs
) throws SQLException
{
605 String id
= String
.valueOf(srcRs
.getInt("tuId") + "-" + srcRs
.getString("gu_name"));
606 boolean success
= equals("Distribution taxonID ", "tu_id: " + String
.valueOf(srcRs
.getInt("tuId")), destRs
.getString("IdInSource"), id
);
607 success
&= equals("Distribution gazetteer_id ", srcRs
.getString("gazetteer_id"), destRs
.getString("AreaERMSGazetteerId"), id
);
608 success
&= equals("Distribution area name ", srcRs
.getString("gu_name"), destRs
.getString("AreaName"), id
);
609 success
&= equals("Distribution area name cache", srcRs
.getString("gu_name"), destRs
.getString("AreaNameCache"), id
);
610 success
&= equals("Distribution OccurrenceStatusFk", 1, destRs
.getInt("OccurrenceStatusFk"), id
);
611 success
&= equals("Distribution OccurrenceStatusCache", "Present", destRs
.getString("OccurrenceStatusCache"), id
);
613 success
&= isNull("SourceFk", destRs
); //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
614 success
&= isNull("SourceCache", destRs
); //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
615 success
&= equals("Distribution notes ", srcRs
.getString("note"), destRs
.getString("Notes"), id
);
616 success
&= isNull("SpeciesExpertGUID", destRs
); //SpeciesExpertGUID does not exist in ERMS
617 //SpeciesExpertName,LastAction,LastActionDate handled in separate method
621 private boolean testSingleCommonNames(int n
) throws SQLException
{
622 boolean success
= true;
623 ResultSet srcRs
= source
.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
624 + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
625 + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
626 ResultSet destRs
= destination
.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
627 + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
628 + " WHERE t." + origErms
629 + " ORDER BY t.IdInSource, ISNULL("+preferredISO639
+", "+alternativeISO639
+"), cn.CommonName, cn.LastActionDate "); //sorting also lastActionDate results in a minimum of exact duplicate problems
630 ResultSet srcRsLastAction
= source
.getResultSet(""
631 + " SELECT v.id, s.sessiondate, a.action_name, s.ExpertName "
632 + " FROM vernaculars v "
633 + " INNER JOIN tu ON tu.id = v.tu_id "
634 + " LEFT JOIN languages l ON l.LanID = v.lan_id"
635 + " LEFT JOIN vernaculars_sessions MN ON v.id = MN.vernacular_id "
636 + " LEFT JOIN actions a ON a.id = MN.action_id "
637 + " LEFT JOIN sessions s ON s.id = MN.session_id "
638 + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id, s.sessiondate DESC, a.id DESC ");
640 while (srcRs
.next() && destRs
.next()){
641 success
&= testSingleCommonName(srcRs
, destRs
);
642 success
&= testLastAction(srcRsLastAction
, destRs
, String
.valueOf(srcRs
.getInt("id")), "CommonName");
645 success
&= equals("Common name count differs", n
, count
, "-1");
649 boolean prefer639_3
= true;
650 String preferredISO639
= prefer639_3?
"ISO639_3":"ISO639_2";
651 String alternativeISO639
= prefer639_3?
"ISO639_2":"ISO639_3";
653 private boolean testSingleCommonName(ResultSet srcRs
, ResultSet destRs
) throws SQLException
{
654 String id
= String
.valueOf(srcRs
.getInt("tuId") + "-" + srcRs
.getString("lan_id"));
655 boolean success
= equals("Common name taxonID ", "tu_id: " + String
.valueOf(srcRs
.getInt("tuId")), destRs
.getString("IdInSource"), id
);
656 success
&= equals("CommonName name ", srcRs
.getString("vername"), destRs
.getString("CommonName"), id
);
657 success
&= equals("Common name languageFk ", srcRs
.getString("iso"), getLanguageIso(destRs
), id
);
658 success
= equals("CommonName LanguageCache ", normalizeLang(srcRs
.getString("LanName")), destRs
.getString("LanguageCache"), id
);
659 //TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
660 success
&= isNull("Region", destRs
); //region does not seem to exist in ERMS
662 // success &= isNull("SourceFk", destRs); //sources should be moved to extra table, check with PESI 2014
663 // success &= isNull("SourceNameCache", destRs); //sources should be moved to extra table, check with PESI 2014
664 success
&= isNull("SpeciesExpertGUID", destRs
); //SpeciesExpertGUID does not exist in ERMS
665 //SpeciesExpertName,LastAction,LastActionDate handled in separate method
670 private String
normalizeLang(String string
) {
671 if ("Spanish".equals(string
)){
672 return "Spanish, Castillian";
673 }else if ("Modern Greek (1453-)".equals(string
)){
675 }else if ("Malay (individual language)".equals(string
)){
677 }else if ("Swahili (individual language)".equals(string
)){
684 private String
getLanguageIso(ResultSet destRs
) throws SQLException
{
685 String result
= destRs
.getString(preferredISO639
);
687 result
= destRs
.getString(alternativeISO639
);
692 private boolean testSingleReferences() throws SQLException
{
693 boolean success
= true;
694 ResultSet srcRS
= source
.getResultSet("SELECT s.* FROM sources s ORDER BY s.id ");
695 ResultSet destRS
= destination
.getResultSet("SELECT s.* FROM Source s "
696 + " WHERE s." + origErms
697 + " ORDER BY s.RefIdInSource "); // +1 for the source reference "erms" but this has no OriginalDB
698 while (srcRS
.next() && destRS
.next()){
699 success
&= testSingleReference(srcRS
, destRS
);
704 private boolean testSingleReference(ResultSet srcRS
, ResultSet destRS
) throws SQLException
{
705 String id
= String
.valueOf(srcRS
.getInt("id"));
706 boolean success
= equals("Reference ID ", srcRS
.getInt("id"), destRS
.getInt("RefIdInSource"), id
);
707 success
&= equals("Reference IMIS_id ", srcRS
.getString("imis_id"), destRS
.getString("IMIS_Id"), id
);
708 success
&= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS
.getString("source_type")), destRS
.getInt("SourceCategoryFk"), id
);
709 success
&= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS
.getString("source_type")), destRS
.getString("SourceCategoryCache"), id
);
710 success
&= equals("Reference name ", srcRS
.getString("source_name"), destRS
.getString("Name"), id
);
711 success
&= equals("Reference abstract ", srcRS
.getString("source_abstract"), destRS
.getString("Abstract"), id
);
712 success
&= equals("Reference title ", srcRS
.getString("source_title"), destRS
.getString("Title"), id
);
713 success
&= equals("Reference author string ", srcRS
.getString("source_author"), destRS
.getString("AuthorString"), id
);
714 success
&= equals("Reference year ", normalizeYear(srcRS
.getString("source_year")), destRS
.getString("RefYear"), id
);
715 success
&= isNull("NomRefCache", destRS
); //for ERMS no other value was found in 2014 value
716 success
&= equals("Reference link ", srcRS
.getString("source_link"), destRS
.getString("Link"), id
);
717 success
&= equals("Reference note ", srcRS
.getString("source_note"), destRS
.getString("Notes"), id
);
722 private Integer
convertSourceTypeFk(String sourceType
) {
723 if (sourceType
== null){
725 }else if ("d".equals(sourceType
)){
727 }else if ("e".equals(sourceType
)){
729 }else if ("p".equals(sourceType
)){
731 }else if ("i".equals(sourceType
)){
736 private String
convertSourceTypeCache(String sourceType
) {
737 if (sourceType
== null){
739 }else if ("d".equals(sourceType
)){
741 }else if ("e".equals(sourceType
)){
742 return "informal reference";
743 }else if ("p".equals(sourceType
)){
744 return "publication";
745 }else if ("i".equals(sourceType
)){
752 private boolean testReferenceCount() {
753 int countSrc
= source
.getUniqueInteger("SELECT count(*) FROM sources ");
754 int countDest
= destination
.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origErms
); // +1 for the source reference "erms" but this has no OriginalDB
755 boolean success
= equals("Reference count ", countSrc
, countDest
, "-1");
759 private String
normalizeYear(String yearStr
) {
760 if (StringUtils
.isBlank(yearStr
)){
763 yearStr
= yearStr
.trim();
764 if (yearStr
.matches("\\d{4}-\\d{2}")){
765 yearStr
= yearStr
.substring(0, 5)+yearStr
.substring(0, 2)+yearStr
.substring(5);
770 private boolean isNull(String attrName
, ResultSet destRS
) throws SQLException
{
771 Object value
= destRS
.getObject(attrName
);
773 String message
= attrName
+ " was expected to be null but was: " + value
.toString();
774 logger
.warn(message
);
777 logger
.info(attrName
+ " was null as expected");
782 private boolean equals(String messageStart
, Timestamp srcDate
, Timestamp destDate
, String id
) {
783 if (!CdmUtils
.nullSafeEqual(srcDate
, destDate
)){
784 String message
= id
+ ": " + messageStart
+ " must be equal, but was not.\n Source: "+ srcDate
+ "; Destination: " + destDate
;
785 logger
.warn(message
);
788 logger
.info(messageStart
+ " were equal: " + srcDate
);
793 private boolean equals(String messageStart
, Integer nSrc
, Integer nDest
, String id
) {
794 String strId
= id
.equals("-1")?
"": (id
+ ": ");
795 if (!CdmUtils
.nullSafeEqual(nSrc
,nDest
)){
796 String message
= strId
+ messageStart
+ " must be equal, but was not.\n Source: "+ nSrc
+ "; Destination: " + nDest
;
797 logger
.warn(message
);
800 logger
.info(strId
+ messageStart
+ " were equal: " + nSrc
);
805 private boolean equals(String messageStart
, String strSrc
, String strDest
, String id
) {
806 if (StringUtils
.isBlank(strSrc
)){
809 strSrc
= strSrc
.trim();
811 //we do not trim strDest here because this should be done during import already. If not it should be shown here
812 if (!CdmUtils
.nullSafeEqual(strSrc
, strDest
)){
813 int index
= diffIndex(strSrc
, strDest
);
814 String message
= id
+ ": " + messageStart
+ " must be equal, but was not at "+index
+".\n Source: "+ strSrc
+ "\n Destination: " + strDest
;
815 logger
.warn(message
);
818 logger
.info(id
+ ": " + messageStart
+ " were equal: " + strSrc
);
823 private int diffIndex(String strSrc
, String strDest
) {
824 if (strSrc
== null || strDest
== null){
828 for (i
= 0; i
<strSrc
.length() && i
<strDest
.length() ;i
++) {
829 if (strSrc
.charAt(i
)!= strDest
.charAt(i
)){
833 if(strSrc
.length()!=strDest
.length()){
834 return Math
.max(strSrc
.length(), strDest
.length());
839 protected Integer
nullSafeInt(ResultSet rs
, String columnName
) throws SQLException
{
840 Object intObject
= rs
.getObject(columnName
);
841 if (intObject
== null){
844 return Integer
.valueOf(intObject
.toString());
848 //** ************* MAIN ********************************************/
852 public static void main(String
[] args
){
853 PesiErmsValidator validator
= new PesiErmsValidator();
854 validator
.invoke(defaultSource
, defaultDestination
);