ref #8508 add TODO to validation
[cdmlib-apps.git] / cdm-pesi / src / main / java / eu / etaxonomy / cdm / io / pesi / erms / validation / PesiErmsValidator.java
1 /**
2 * Copyright (C) 2019 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.pesi.erms.validation;
10
11 import java.sql.ResultSet;
12 import java.sql.SQLException;
13 import java.sql.Timestamp;
14
15 import org.apache.commons.lang.StringUtils;
16 import org.apache.log4j.Logger;
17
18 import eu.etaxonomy.cdm.app.pesi.PesiDestinations;
19 import eu.etaxonomy.cdm.app.pesi.PesiSources;
20 import eu.etaxonomy.cdm.common.CdmUtils;
21 import eu.etaxonomy.cdm.io.common.Source;
22 import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
23
24 /**
25 * Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
26 *
27 * @author a.mueller
28 * @since 01.09.2019
29 */
30 public class PesiErmsValidator {
31
32 private static final Logger logger = Logger.getLogger(PesiErmsValidator.class);
33
34 private static final Source defaultSource = PesiSources.PESI2019_ERMS();
35 private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI();
36
37 private Source source = defaultSource;
38 private Source destination = defaultDestination;
39 private String moneraFilter = " NOT IN (-1)"; // 147415;
40 // private String moneraFilter = " NOT IN (147415)"; // 147415;
41
42 private String origErms = "OriginalDB = 'ERMS' ";
43
44 public void invoke(Source source, Source destination){
45 logger.warn("Validate destination " + destination.getDatabase());
46 boolean success = true;
47 try {
48 this.source = source;
49 this.destination = destination;
50 // success &= testReferences(); //ready, few minor issues to be discussed with VLIZ
51 success &= testTaxa();
52 // success &= testTaxonRelations();
53 // success &= testCommonNames(); //source(s) discuss VLIZ, exact duplicates (except for sources), Anus(Korur)
54 // success &= testDistributions(); //>1000 duplicates in "dr", sources (OccurrenceSource table), area spellings(Baelt Sea), 1 long note
55 // success &= testNotes(); //ecology & link notes test (only count tested), sources untested (NoteSource table), few duplicates,
56 success &= testAdditionalTaxonSources(); //ready
57 } catch (Exception e) {
58 e.printStackTrace();
59 success = false;
60 }
61 //TBC
62 System.out.println("end validation " + (success? "":"NOT ") + "successful.");
63 }
64
65 private boolean testAdditionalTaxonSources() throws SQLException {
66 System.out.println("Start validate additional taxon sources");
67 boolean success = testAdditionalTaxonSourcesCount();
68 if (success){
69 success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
70 }
71 return success;
72 }
73
74 private boolean testNotes() throws SQLException {
75 System.out.println("Start validate notes");
76 boolean success = testNotesCount();
77 if (success){
78 success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
79 }
80 return success;
81 }
82
83 private boolean testDistributions() throws SQLException {
84 System.out.println("Start validate distributions");
85 boolean success = testDistributionCount();
86 if (success){
87 success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr "));
88 }
89 return success;
90 }
91
92 private boolean testCommonNames() throws SQLException {
93 System.out.println("Start validate common names");
94 boolean success = testCommonNameCount();
95 if (success){
96 success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
97 }
98 return success;
99 }
100
101 private boolean testTaxonRelations() throws SQLException {
102 System.out.println("Start validate taxon relations");
103 boolean success = testTaxonRelationCount();
104 if (success){
105 //TODO
106 success &= testSingleTaxonRelations(source.getUniqueInteger(countSynonymRelation));
107 }
108 return success;
109 }
110
111 private boolean testTaxa() throws SQLException {
112 System.out.println("Start validate taxa");
113 boolean success = testTaxaCount();
114 if (success){
115 success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
116 }
117 return success;
118 }
119
120 private boolean testReferences() throws SQLException {
121 System.out.println("Start validate references");
122 boolean success = testReferenceCount();
123 if (success){
124 success &= testSingleReferences();
125 }
126 return success;
127 }
128
129 private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts WHERE ts.tu_id " + moneraFilter;
130 private boolean testAdditionalTaxonSourcesCount() {
131 int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
132 int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
133 return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
134 }
135
136 private boolean testNotesCount() {
137 int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
138 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
139 + " WHERE NOT (NoteCategoryFk = 4 AND LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) ");
140 boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
141
142 countSrc = source.getUniqueInteger("SELECT count(*) FROM tu "
143 + " WHERE (tu_marine IS NOT NULL OR tu_brackish IS NOT NULL OR tu_fresh IS NOT NULL OR tu_terrestrial IS NOT NULL) "
144 + " AND tu.id " + moneraFilter );
145 countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
146 + " WHERE (NoteCategoryFk = 4 AND LastAction IS NULL) ");
147 result &= equals("Notes ecology count ", countSrc, countDest, String.valueOf(-1));
148
149 countSrc = source.getUniqueInteger("SELECT count(*) FROM links ");
150 countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
151 + " WHERE NoteCategoryFk IN (22,23,24) ");
152 result &= equals("Notes link count ", countSrc, countDest, String.valueOf(-1));
153
154 return result;
155 }
156
157 private boolean testDistributionCount() {
158 int countSrc = source.getUniqueInteger("SELECT count(*) FROM dr ");
159 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
160 return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
161 }
162
163 private boolean testCommonNameCount() {
164 int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
165 int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
166 return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
167 }
168
169 private final String countSynonymRelation = "SELECT count(*) FROM tu WHERE tu_acctaxon <> id AND id " + moneraFilter;
170 private final String countParentRelation = "SELECT count(*) FROM tu WHERE tu_parent is not null and tu_status = 1 and id <> tu_parent AND id " + moneraFilter;
171 private boolean testTaxonRelationCount() {
172 //synonyms
173 int countSrc = source.getUniqueInteger(countSynonymRelation);
174 //TODO
175 int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
176 boolean result = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
177 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'currently placed%'
178 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'currently held%'
179 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'sy%' or tu_unacceptreason like '%jun%syn%'
180 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason = '(synonym)'
181 // update Match_RelStat set RelTaxon = 102 where tu_unacceptreason = 'reverted genus transfer'
182 // update Match_RelStat set RelTaxon = 103 where tu_unacceptreason like 'misapplied%'
183 // update Match_RelStat set RelTaxon = 104 where tu_unacceptreason like 'part% synonym%'
184 // update Match_RelStat set RelTaxon = 106 where tu_unacceptreason = 'heterotypic synonym' or tu_unacceptreason = 'subjective synonym'
185 // update Match_RelStat set RelTaxon = 107 where tu_unacceptreason like '%homot%syn%' or tu_unacceptreason = 'objective synonym' synyonym
186 // update Match_RelStat set RelTaxon = 107 where tu_unacceptreason like '%bas[iy][no]%ny%'
187
188 //Name relations
189 countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter + " AND ("
190 + " tu_unacceptreason like '%bas[iy][no]%ny%' OR tu_unacceptreason = 'original combination' "
191 + " OR tu_unacceptreason = 'Subsequent combination' OR tu_unacceptreason like '%genus transfer%' "
192 + " OR tu_unacceptreason = 'genus change' " //1
193 + " OR tu_unacceptreason like '%homon%' " // 2
194 + " OR tu_unacceptreason like '%spell%' OR tu_unacceptreason like 'lapsus %' " //16
195
196 + ")");
197 countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
198 result = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
199
200 //included in
201 countSrc = source.getUniqueInteger(countParentRelation);
202 //TODO
203 countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
204 result &= equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
205 return result;
206 }
207
208
209 private final String countTaxon = "SELECT count(*) FROM tu WHERE id " + moneraFilter;
210 private boolean testTaxaCount() {
211 int countSrc = source.getUniqueInteger(countTaxon);
212 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon ");
213 boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
214
215 //NomStatus
216 countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter + " AND ("
217 + " tu_unacceptreason like '%inval%' OR tu_unacceptreason like '%not val%' "
218 + " OR tu_unacceptreason like '%illeg%' OR tu_unacceptreason like '%nud%' "
219 + " OR tu_unacceptreason like '%rej.%' OR tu_unacceptreason like '%superfl%' "
220 + " OR tu_unacceptreason like '%Comb. nov%' OR tu_unacceptreason like '%New name%' "
221 + " OR tu_unacceptreason = 'new combination' "
222 + " OR tu_status IN (3,5,6,7,8) )");
223 countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon WHERE NameStatusFk IS NOT NULL ");
224 result = equals("Taxon name status count ", countSrc, countDest, String.valueOf(-1));
225
226 return result;
227 }
228
229 private boolean testSingleTaxa(int n) throws SQLException {
230 boolean success = true;
231 ResultSet srcRS = source.getResultSet(""
232 + " SELECT t.*, tu1.tu_name tu1_name, r.rank_name, acc.tu_sp as acc_sp, st.status_name, "
233 + " type.tu_displayname typename, type.tu_authority typeauthor, "
234 + " fo.fossil_name, qs.qualitystatus_name "
235 + " FROM tu t "
236 + " LEFT JOIN tu as tu1 on t.tu_parent = tu1.id"
237 + " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 40 AND rank_name = 'Subdivision' OR rank_id = 122 AND rank_name='Subsection')) as r ON t.tu_rank = r.rank_id "
238 + " LEFT JOIN tu acc ON acc.id = t.tu_acctaxon "
239 + " LEFT JOIN status st ON st.status_id = t.tu_status "
240 + " LEFT JOIN tu type ON type.id = t.tu_typetaxon "
241 + " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
242 + " LEFT JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
243 + " WHERE t.id " + moneraFilter
244 + " ORDER BY CAST(t.id as nvarchar(20)) ");
245 ResultSet destRS = destination.getResultSet("SELECT t.*, "
246 + " pt.GenusOrUninomial p_GenusOrUninomial, pt.InfraGenericEpithet p_InfraGenericEpithet, pt.SpecificEpithet p_SpecificEpithet, "
247 + " s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
248 + " FROM Taxon t "
249 + " LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
250 + " LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
251 + " LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
252 + " LEFT JOIN Source s ON s.SourceId = t.SourceFk "
253 + " WHERE t."+ origErms
254 + " ORDER BY t.IdInSource");
255 ResultSet srcRsLastAction = source.getResultSet(""
256 + " SELECT t.id, s.sessiondate, a.action_name, s.ExpertName "
257 + " FROM tu t "
258 + " LEFT OUTER JOIN tu_sessions MN ON t.id = MN.tu_id "
259 + " LEFT JOIN actions a ON a.id = MN.action_id "
260 + " LEFT JOIN sessions s ON s.id = MN.session_id "
261 + " ORDER BY CAST(t.id as nvarchar(20)), s.sessiondate DESC, a.id DESC ");
262 int i = 0;
263 while (srcRS.next() && destRS.next()){
264 success &= testSingleTaxon(srcRS, destRS);
265 //TODO success &= testLastAction(srcRsLastAction, destRS, String.valueOf(srcRS.getInt("id")), "Taxon");
266 i++;
267 }
268 success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
269 return success;
270 }
271
272
273 private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
274 String id = String.valueOf(srcRS.getInt("id"));
275 boolean success = equals("Taxon ID", "tu_id: " + srcRS.getInt("id"), destRS.getString("IdInSource"), id);
276 success &= equals("Taxon source", "ERMS export for PESI", destRS.getString("sourceName"), id);
277 //TODO some success &= compareKingdom("Taxon kingdom", srcRS, destRS, id);
278 success &= equals("Taxon rank fk", srcRS.getString("tu_rank"), destRS.getString("RankFk"), id);
279 success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name"), srcRS, id), destRS.getString("Rank"), id);
280 success &= compareNameParts(srcRS, destRS, id);
281
282 success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id);
283 // success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
284 success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id);
285 // success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
286 success &= isNull("NomRefString", destRS);
287 // success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id); //according to SQL script same as FullName, no nom.ref. information attached
288
289 //TODO success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
290 //TODO success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
291
292 //TODO success &= equals("Taxon TaxonStatusFk", nullSafeInt(srcRS, "tu_status"),nullSafeInt( destRS,"TaxonStatusFk"), id);
293 //TODO success &= equals("Taxon TaxonStatusCache", srcRS.getString("status_name"), destRS.getString("TaxonStatusCache"), id);
294
295 //TODO ParentTaxonFk
296 Integer orgigTypeNameFk = nullSafeInt(srcRS, "tu_typetaxon");
297 success &= equals("Taxon TypeNameFk", orgigTypeNameFk == null? null : "tu_id: " + orgigTypeNameFk, destRS.getString("typeSourceId"), id);
298 //TODO success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id);
299 success &= equals("Taxon QualityStatusFK", nullSafeInt(srcRS, "tu_qualitystatus"),nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
300 success &= equals("Taxon QualityStatusCache", srcRS.getString("qualitystatus_name"), destRS.getString("QualityStatusCache"), id);
301 //TODO TreeIndex
302 success &= equals("Taxon FossilStatusFk", nullSafeInt(srcRS, "tu_fossil"),nullSafeInt( destRS,"FossilStatusFk"), String.valueOf(id));
303 success &= equals("Taxon FossilStatusCache", srcRS.getString("fossil_name"), destRS.getString("FossilStatusCache"), id);
304 success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
305 success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
306 success &= isNull("ExpertGUID", destRS); //only relevant after merge
307 success &= isNull("ExpertName", destRS); //only relevant after merge
308 success &= isNull("SpeciesExpertGUID", destRS); //only relevant after merge
309 success &= equals("Taxon cache citation", srcRS.getString("cache_citation"), destRS.getString("CacheCitation"), id);
310 //LastAction(Date) handled in separate method
311 success &= isNull("GUID2", destRS); //only relevant after merge
312 success &= isNull("DerivedFromGuid2", destRS); //only relevant after merge
313 return success;
314 }
315
316 boolean namePartsFirst = true;
317 private boolean compareNameParts(ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
318 if (namePartsFirst){
319 logger.warn("Validation of name parts not fully implemented (difficult). Currently validated via fullname");
320 namePartsFirst = false;
321 }
322 int rankFk = srcRS.getInt("tu_rank");
323 String genusOrUninomial = null;
324 String infraGenericEpithet = null;
325 String specificEpithet = null;
326 String infraSpecificEpithet = null;
327 if (rankFk <= 180){
328 genusOrUninomial = srcRS.getString("tu_name");
329 }else if (rankFk == 190){
330 genusOrUninomial = srcRS.getString("tu1_name");
331 infraGenericEpithet = srcRS.getString("tu_name");
332 //TODO does not work this way
333 // }else if (rankFk == 220){
334 // genusOrUninomial = destRS.getString("p_GenusOrUninomial");
335 // infraGenericEpithet = destRS.getString("p_InfraGenericEpithet");
336 // specificEpithet = srcRS.getString("tu_name");
337 }else{
338 //TODO exception
339 return false;
340 }
341 boolean result = testEpis(destRS, genusOrUninomial, infraGenericEpithet,
342 specificEpithet, infraSpecificEpithet, id);
343 return result;
344 }
345
346 private boolean testEpis(ResultSet destRS, String genusOrUninomial, String infraGenericEpithet, String specificEpithet,
347 String infraSpecificEpithet, String id) throws SQLException {
348 boolean result = equals("Taxon genusOrUninomial", genusOrUninomial, destRS.getString("GenusOrUninomial"), id) ;
349 result &= equals("Taxon infraGenericEpithet", infraGenericEpithet, destRS.getString("InfraGenericEpithet"), id) ;
350 result &= equals("Taxon specificEpithet", specificEpithet, destRS.getString("SpecificEpithet"), id) ;
351 result &= equals("Taxon infraSpecificEpithet", infraSpecificEpithet, destRS.getString("InfraSpecificEpithet"), id) ;
352 return result;
353 }
354
355 private String normalizeRank(String string, ResultSet srcRS, String id) throws SQLException {
356 String result = string
357 .replace("Subforma", "Subform")
358 .replace("Forma", "Form");
359 int kingdomFk = Integer.valueOf(getSourceKingdomFk(srcRS, id));
360 if (kingdomFk == 3 || kingdomFk == 4){
361 result = result.replace("Subphylum", "Subdivision");
362 result = result.replace("Phylum", "Division");
363 }
364 return result;
365 }
366
367 //see also ErmsTaxonImport.getExpectedTitleCache()
368 private String srcFullName(ResultSet srcRs) throws SQLException {
369 String result = null;
370 String epi = srcRs.getString("tu_name");
371 epi = " a" + epi;
372 String display = srcRs.getString("tu_displayname");
373 String sp = srcRs.getString("tu_sp");
374 if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal
375 result = srcRs.getString("tu_displayname").replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";
376 }else{
377 result = CdmUtils.concat(" ", srcRs.getString("tu_displayname"), srcRs.getString("tu_authority"));
378 }
379 return result;
380 }
381
382 private String srcDisplayName(ResultSet srcRs) throws SQLException {
383 String result = null;
384 String epi = srcRs.getString("tu_name");
385 epi = " a" + epi;
386 String display = "<i>"+srcRs.getString("tu_displayname")+"</i>";
387 display = display.replace(" var. ", "</i> var. <i>").replace(" f. ", "</i> f. <i>");
388 String sp = srcRs.getString("tu_sp");
389 if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal
390 result = display.replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";
391 }else{
392 result = CdmUtils.concat(" ", display, srcRs.getString("tu_authority"));
393 }
394 return result;
395 }
396
397 String lastLastActionId = "-1";
398 private boolean testLastAction(ResultSet srcRsLastAction, ResultSet destRs, String id, String table) throws SQLException {
399 try {
400 boolean success = true;
401 String srcId = null;
402 while (srcRsLastAction.next()){
403 srcId = String.valueOf(srcRsLastAction.getInt("id"));
404 if (!lastLastActionId.equals(srcId)){
405 lastLastActionId = srcId;
406 break;
407 }
408 }
409 if(!id.equals(srcId)){
410 logger.warn("Last Action SourceIDs are not equal: id: " +id + ", la-id: " + srcId);
411 }
412 String destStr = destRs.getString("LastAction");
413 success &= equals(table + " SpeciesExpertName", srcRsLastAction.getString("ExpertName"), destRs.getString("SpeciesExpertName"), id); //mapping ExpertName => SpeciesExpertName according to SQL script
414 success &= equals(table + " Last Action", srcRsLastAction.getString("action_name"), destStr == null? null : destStr, id);
415 success &= equals(table + " Last Action Date", srcRsLastAction.getTimestamp("sessiondate"), destRs.getTimestamp("LastActionDate"), id);
416
417 return success;
418 } catch (Exception e) {
419 e.printStackTrace();
420 throw e;
421 }
422 }
423
424 private boolean compareKingdom(String messageStart, ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
425 String srcKingdom = getSourceKingdomFk(srcRS, id);
426 Integer intDest = nullSafeInt(destRS, "KingdomFk");
427 if (intDest == null){
428 logger.warn(id +": " + messageStart + " must never be null for destination. Biota needs to be 0, all the rest needs to have >0 int value.");
429 return false;
430 }else{
431 return equals(messageStart, srcKingdom, String.valueOf(intDest), id);
432 }
433 }
434
435 private String getSourceKingdomFk(ResultSet srcRS, String id) throws SQLException {
436 String strSrc = srcRS.getString("acc_sp");
437 if (strSrc == null){
438 strSrc = srcRS.getString("tu_sp");
439 }
440 if (strSrc == null){
441 strSrc = "1".equals(id)?"0":id;
442 }else{
443 strSrc = strSrc.substring(1);
444 strSrc = strSrc.substring(0, strSrc.indexOf("#"));
445 }
446 return strSrc;
447 }
448
449 private boolean testSingleTaxonRelations(int n) throws SQLException {
450 boolean success = true;
451 ResultSet srcRS = source.getResultSet(""
452 + " SELECT t.* "
453 + " FROM tu t "
454 + " WHERE t.id "+ moneraFilter + " AND tu_acctaxon <> id "
455 + " ORDER BY CAST(t.id as nvarchar(20)) ");
456 ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
457 + " FROM RelTaxon rel "
458 + " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
459 + " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
460 + " WHERE t1."+origErms+" AND t2." + origErms
461 + " ORDER BY t1.IdInSource");
462 int i = 0;
463 while (srcRS.next() && destRS.next()){
464 success &= testSingleTaxonRelation(srcRS, destRS);
465 i++;
466 }
467 success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
468 return success;
469 }
470
471 private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
472 String id = String.valueOf(srcRS.getInt("id"));
473 boolean success = equals("Taxon relation taxon1", "tu_id: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
474 success &= equals("Taxon relation taxon2", "tu_id: " + srcRS.getInt("tu_acctaxon"), destRS.getString("t2Id"), id);
475 success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
476 success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
477 //TODO enable after next import
478 // success &= isNull("notes", destRS);
479 //complete if no further relations need to added
480 return success;
481 }
482
483 private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
484 boolean success = true;
485 ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, MN.*, s.*, su.sourceuse_name "
486 + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
487 + " LEFT JOIN sources s ON s.id = MN.source_id "
488 + " LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
489 + " WHERE MN.tu_id " + moneraFilter
490 + " ORDER BY CAST(tu.id as nvarchar(20)), MN.sourceuse_id, s.id "); //, no.note (not possible because ntext
491 ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
492 + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
493 + " INNER JOIN Source s ON s.SourceId = ats.SourceFk "
494 + " LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
495 + " WHERE t."+origErms
496 + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
497 int count = 0;
498 while (srcRs.next() && destRs.next()){
499 success &= testSingleAdditionalTaxonSource(srcRs, destRs);
500 count++;
501 }
502 success &= equals("Notes count differs", n, count, "-1");
503 return success;
504 }
505
506 private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
507 String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
508 boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
509 success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id); //currently we use the same id in ERMS and PESI
510 success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
511 success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
512 //TODO some records are still truncated ~ >820 characters
513 success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
514 success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
515 //Complete
516 return success;
517 }
518
519 private boolean testSingleNotes(int n) throws SQLException {
520 boolean success = true;
521 ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, no.*, l.LanName "
522 + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
523 + " LEFT JOIN languages l ON l.LanID = no.lan_id "
524 + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable "); //, no.note (not possible because ntext
525 ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
526 + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
527 + " LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
528 + " LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
529 + " WHERE t." + origErms
530 + " AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
531 + " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1 ");
532 int count = 0;
533 ResultSet srcRsLastAction = source.getResultSet(""
534 + " SELECT no.id, s.sessiondate, a.action_name, s.ExpertName "
535 + " FROM notes no "
536 + " INNER JOIN tu ON tu.id = no.tu_id "
537 + " LEFT JOIN languages l ON l.LanID = no.lan_id"
538 + " LEFT JOIN notes_sessions MN ON no.id = MN.note_id "
539 + " LEFT JOIN actions a ON a.id = MN.action_id "
540 + " LEFT JOIN sessions s ON s.id = MN.session_id "
541 + " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable, s.sessiondate DESC, a.id DESC ");
542
543 while (srcRs.next() && destRs.next()){
544 success &= testSingleNote(srcRs, destRs);
545 success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "Note");
546 count++;
547 }
548 success &= equals("Notes count differs", n, count, "-1");
549 return success;
550 }
551
552 private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
553 String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
554 boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
555 success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
556 success &= isNull("Note_2", destRs);
557 success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
558 success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
559 success &= isNull("Region", destRs);
560 success &= isNull("SpeciesExpertGUID", destRs);
561 //SpeciesExpertName, LastAction, LastActionDate handled in separate method
562 //complete
563 return success;
564 }
565
566 private String normalizeNoteCatCache(String string) {
567 return StringUtils.capitalize(string)
568 .replace("Original Combination", "Original combination")
569 .replace("Taxonomic remark", "Taxonomic Remark");
570 }
571
572 private boolean testSingleDistributions(int n) throws SQLException {
573 boolean success = true;
574 ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId,"
575 + " gu.gazetteer_id, dr.*, gu.id guId, gu.gu_name "
576 + " FROM dr INNER JOIN tu ON dr.tu_id = tu.id "
577 + " LEFT JOIN gu ON gu.id = dr.gu_id "
578 + " ORDER BY CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)), gu.gazetteer_id, gu.gu_name, dr.noteSortable "); //, dr.note (not possible because ntext
579 ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaERMSGazetteerId, oc.*, a.AreaName "
580 + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
581 + " LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
582 + " WHERE t." + origErms
583 + " ORDER BY t.IdInSource, a.AreaERMSGazetteerId, a.AreaName, oc.Notes ");
584 ResultSet srcRsLastAction = source.getResultSet(""
585 + " SELECT dr.id, s.sessiondate, a.action_name, s.ExpertName "
586 + " FROM dr "
587 + " INNER JOIN tu ON tu.id = dr.tu_id "
588 + " LEFT JOIN gu ON gu.id = dr.gu_id "
589 + " LEFT JOIN dr_sessions MN ON dr.id = MN.dr_id "
590 + " LEFT JOIN actions a ON a.id = MN.action_id "
591 + " LEFT JOIN sessions s ON s.id = MN.session_id "
592 + " ORDER BY CAST(tu.id as nvarchar(20)), gu.gazetteer_id, gu.gu_name, s.sessiondate DESC, a.id DESC ");
593 int count = 0;
594 while (srcRs.next() && destRs.next()){
595 success &= testSingleDistribution(srcRs, destRs);
596 //there are >1000 duplicates in dr, therefore this creates lots of warnings
597 success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "Distribution");
598 count++;
599 }
600 success &= equals("Distribution count differs", n, count, "-1");
601 return success;
602 }
603
604 private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
605 String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("gu_name"));
606 boolean success = equals("Distribution taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
607 success &= equals("Distribution gazetteer_id ", srcRs.getString("gazetteer_id"), destRs.getString("AreaERMSGazetteerId"), id);
608 success &= equals("Distribution area name ", srcRs.getString("gu_name"), destRs.getString("AreaName"), id);
609 success &= equals("Distribution area name cache", srcRs.getString("gu_name"), destRs.getString("AreaNameCache"), id);
610 success &= equals("Distribution OccurrenceStatusFk", 1, destRs.getInt("OccurrenceStatusFk"), id);
611 success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
612 //TODO see comments
613 success &= isNull("SourceFk", destRs); //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
614 success &= isNull("SourceCache", destRs); //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
615 success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
616 success &= isNull("SpeciesExpertGUID", destRs); //SpeciesExpertGUID does not exist in ERMS
617 //SpeciesExpertName,LastAction,LastActionDate handled in separate method
618 return success;
619 }
620
621 private boolean testSingleCommonNames(int n) throws SQLException {
622 boolean success = true;
623 ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
624 + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
625 + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
626 ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
627 + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
628 + " WHERE t." + origErms
629 + " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate "); //sorting also lastActionDate results in a minimum of exact duplicate problems
630 ResultSet srcRsLastAction = source.getResultSet(""
631 + " SELECT v.id, s.sessiondate, a.action_name, s.ExpertName "
632 + " FROM vernaculars v "
633 + " INNER JOIN tu ON tu.id = v.tu_id "
634 + " LEFT JOIN languages l ON l.LanID = v.lan_id"
635 + " LEFT JOIN vernaculars_sessions MN ON v.id = MN.vernacular_id "
636 + " LEFT JOIN actions a ON a.id = MN.action_id "
637 + " LEFT JOIN sessions s ON s.id = MN.session_id "
638 + " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id, s.sessiondate DESC, a.id DESC ");
639 int count = 0;
640 while (srcRs.next() && destRs.next()){
641 success &= testSingleCommonName(srcRs, destRs);
642 success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "CommonName");
643 count++;
644 }
645 success &= equals("Common name count differs", n, count, "-1");
646 return success;
647 }
648
649 boolean prefer639_3 = true;
650 String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
651 String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
652
653 private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
654 String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
655 boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
656 success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
657 success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
658 success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
659 //TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
660 success &= isNull("Region", destRs); //region does not seem to exist in ERMS
661 //TODO see comments
662 // success &= isNull("SourceFk", destRs); //sources should be moved to extra table, check with PESI 2014
663 // success &= isNull("SourceNameCache", destRs); //sources should be moved to extra table, check with PESI 2014
664 success &= isNull("SpeciesExpertGUID", destRs); //SpeciesExpertGUID does not exist in ERMS
665 //SpeciesExpertName,LastAction,LastActionDate handled in separate method
666 //complete
667 return success;
668 }
669
670 private String normalizeLang(String string) {
671 if ("Spanish".equals(string)){
672 return "Spanish, Castillian";
673 }else if ("Modern Greek (1453-)".equals(string)){
674 return "Greek";
675 }else if ("Malay (individual language)".equals(string)){
676 return "Malay";
677 }else if ("Swahili (individual language)".equals(string)){
678 return "Swahili";
679 }
680
681 return string;
682 }
683
684 private String getLanguageIso(ResultSet destRs) throws SQLException {
685 String result = destRs.getString(preferredISO639);
686 if (result == null){
687 result = destRs.getString(alternativeISO639);
688 }
689 return result;
690 }
691
692 private boolean testSingleReferences() throws SQLException {
693 boolean success = true;
694 ResultSet srcRS = source.getResultSet("SELECT s.* FROM sources s ORDER BY s.id ");
695 ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
696 + " WHERE s." + origErms
697 + " ORDER BY s.RefIdInSource "); // +1 for the source reference "erms" but this has no OriginalDB
698 while (srcRS.next() && destRS.next()){
699 success &= testSingleReference(srcRS, destRS);
700 }
701 return success;
702 }
703
704 private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
705 String id = String.valueOf(srcRS.getInt("id"));
706 boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
707 success &= equals("Reference IMIS_id ", srcRS.getString("imis_id"), destRS.getString("IMIS_Id"), id);
708 success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("source_type")), destRS.getInt("SourceCategoryFk"), id);
709 success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("source_type")), destRS.getString("SourceCategoryCache"), id);
710 success &= equals("Reference name ", srcRS.getString("source_name"), destRS.getString("Name"), id);
711 success &= equals("Reference abstract ", srcRS.getString("source_abstract"), destRS.getString("Abstract"), id);
712 success &= equals("Reference title ", srcRS.getString("source_title"), destRS.getString("Title"), id);
713 success &= equals("Reference author string ", srcRS.getString("source_author"), destRS.getString("AuthorString"), id);
714 success &= equals("Reference year ", normalizeYear(srcRS.getString("source_year")), destRS.getString("RefYear"), id);
715 success &= isNull("NomRefCache", destRS); //for ERMS no other value was found in 2014 value
716 success &= equals("Reference link ", srcRS.getString("source_link"), destRS.getString("Link"), id);
717 success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
718 //complete
719 return success;
720 }
721
722 private Integer convertSourceTypeFk(String sourceType) {
723 if (sourceType == null){
724 return null;
725 }else if ("d".equals(sourceType)){
726 return 4;
727 }else if ("e".equals(sourceType)){
728 return 5;
729 }else if ("p".equals(sourceType)){
730 return 11;
731 }else if ("i".equals(sourceType)){
732 return 12;
733 }
734 return null;
735 }
736 private String convertSourceTypeCache(String sourceType) {
737 if (sourceType == null){
738 return null;
739 }else if ("d".equals(sourceType)){
740 return "database";
741 }else if ("e".equals(sourceType)){
742 return "informal reference";
743 }else if ("p".equals(sourceType)){
744 return "publication";
745 }else if ("i".equals(sourceType)){
746 //TODO
747 return "i";
748 }
749 return null;
750 }
751
752 private boolean testReferenceCount() {
753 int countSrc = source.getUniqueInteger("SELECT count(*) FROM sources ");
754 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origErms); // +1 for the source reference "erms" but this has no OriginalDB
755 boolean success = equals("Reference count ", countSrc, countDest, "-1");
756 return success;
757 }
758
759 private String normalizeYear(String yearStr) {
760 if (StringUtils.isBlank(yearStr)){
761 return yearStr;
762 }
763 yearStr = yearStr.trim();
764 if (yearStr.matches("\\d{4}-\\d{2}")){
765 yearStr = yearStr.substring(0, 5)+yearStr.substring(0, 2)+yearStr.substring(5);
766 }
767 return yearStr;
768 }
769
770 private boolean isNull(String attrName, ResultSet destRS) throws SQLException {
771 Object value = destRS.getObject(attrName);
772 if (value != null){
773 String message = attrName + " was expected to be null but was: " + value.toString();
774 logger.warn(message);
775 return false;
776 }else{
777 logger.info(attrName + " was null as expected");
778 return true;
779 }
780 }
781
782 private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
783 if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
784 String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+ srcDate + "; Destination: " + destDate;
785 logger.warn(message);
786 return false;
787 }else{
788 logger.info(messageStart + " were equal: " + srcDate);
789 return true;
790 }
791 }
792
793 private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
794 String strId = id.equals("-1")? "": (id+ ": ");
795 if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
796 String message = strId+ messageStart + " must be equal, but was not.\n Source: "+ nSrc + "; Destination: " + nDest;
797 logger.warn(message);
798 return false;
799 }else{
800 logger.info(strId + messageStart + " were equal: " + nSrc);
801 return true;
802 }
803 }
804
805 private boolean equals(String messageStart, String strSrc, String strDest, String id) {
806 if (StringUtils.isBlank(strSrc)){
807 strSrc = null;
808 }else{
809 strSrc = strSrc.trim();
810 }
811 //we do not trim strDest here because this should be done during import already. If not it should be shown here
812 if (!CdmUtils.nullSafeEqual(strSrc, strDest)){
813 int index = diffIndex(strSrc, strDest);
814 String message = id+ ": " + messageStart + " must be equal, but was not at "+index+".\n Source: "+ strSrc + "\n Destination: " + strDest;
815 logger.warn(message);
816 return false;
817 }else{
818 logger.info(id+ ": " + messageStart + " were equal: " + strSrc);
819 return true;
820 }
821 }
822
823 private int diffIndex(String strSrc, String strDest) {
824 if (strSrc == null || strDest == null){
825 return 0;
826 }
827 int i;
828 for (i = 0; i<strSrc.length() && i<strDest.length() ;i++) {
829 if (strSrc.charAt(i)!= strDest.charAt(i)){
830 return i;
831 }
832 }
833 if(strSrc.length()!=strDest.length()){
834 return Math.max(strSrc.length(), strDest.length());
835 }
836 return i;
837 }
838
839 protected Integer nullSafeInt(ResultSet rs, String columnName) throws SQLException {
840 Object intObject = rs.getObject(columnName);
841 if (intObject == null){
842 return null;
843 }else{
844 return Integer.valueOf(intObject.toString());
845 }
846 }
847
848 //** ************* MAIN ********************************************/
849
850
851
852 public static void main(String[] args){
853 PesiErmsValidator validator = new PesiErmsValidator();
854 validator.invoke(defaultSource, defaultDestination);
855 System.exit(0);
856 }
857 }