Revision 164fc327
Added by Andreas Müller over 4 years ago
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/validation/PesiErmsValidator.java | ||
---|---|---|
19 | 19 |
import eu.etaxonomy.cdm.app.pesi.PesiSources; |
20 | 20 |
import eu.etaxonomy.cdm.common.CdmUtils; |
21 | 21 |
import eu.etaxonomy.cdm.io.common.Source; |
22 |
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer; |
|
22 | 23 |
|
23 | 24 |
/** |
24 | 25 |
* Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB. |
... | ... | |
43 | 44 |
this.source = source; |
44 | 45 |
this.destination = destination; |
45 | 46 |
// success &= testReferences(); |
46 |
// success &= testTaxa();
|
|
47 |
// success &= testTaxonRelations();
|
|
47 |
success &= testTaxa(); |
|
48 |
success &= testTaxonRelations(); |
|
48 | 49 |
// success &= testCommonNames(); |
49 | 50 |
// success &= testDistributions(); |
50 |
success &= testNotes(); |
|
51 |
// success &= testNotes(); |
|
52 |
// success &= testAdditionalTaxonSources(); |
|
51 | 53 |
} catch (Exception e) { |
52 | 54 |
e.printStackTrace(); |
53 | 55 |
success = false; |
... | ... | |
56 | 58 |
System.out.println("end validation " + (success? "":"NOT ") + "successful."); |
57 | 59 |
} |
58 | 60 |
|
61 |
private boolean testAdditionalTaxonSources() throws SQLException { |
|
62 |
System.out.println("Start validate additional taxon sources"); |
|
63 |
boolean success = testAdditionalTaxonSourcesCount(); |
|
64 |
if (success){ |
|
65 |
success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource)); |
|
66 |
} |
|
67 |
return success; |
|
68 |
} |
|
69 |
|
|
59 | 70 |
private boolean testNotes() throws SQLException { |
71 |
System.out.println("Start validate notes"); |
|
60 | 72 |
boolean success = testNotesCount(); |
61 |
if (!success){
|
|
73 |
if (success){ |
|
62 | 74 |
success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes ")); |
63 | 75 |
} |
64 | 76 |
return success; |
65 | 77 |
} |
66 | 78 |
|
67 | 79 |
private boolean testDistributions() throws SQLException { |
80 |
System.out.println("Start validate distributions"); |
|
68 | 81 |
boolean success = testDistributionCount(); |
69 | 82 |
if (success){ |
70 | 83 |
success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr ")); |
... | ... | |
73 | 86 |
} |
74 | 87 |
|
75 | 88 |
private boolean testCommonNames() throws SQLException { |
89 |
System.out.println("Start validate common names"); |
|
76 | 90 |
boolean success = testCommonNameCount(); |
77 | 91 |
if (success){ |
78 | 92 |
success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars ")); |
... | ... | |
80 | 94 |
return success; |
81 | 95 |
} |
82 | 96 |
|
83 |
private boolean testTaxonRelations() { |
|
84 |
boolean success = true; |
|
97 |
private boolean testTaxonRelations() throws SQLException { |
|
98 |
System.out.println("Start validate taxon relations"); |
|
99 |
boolean success = testTaxonRelationCount(); |
|
100 |
if (success){ |
|
101 |
success &= testSingleTaxonRelations(source.getUniqueInteger(countTaxonRelation)); |
|
102 |
} |
|
85 | 103 |
return success; |
86 | 104 |
} |
87 | 105 |
|
88 | 106 |
private boolean testTaxa() throws SQLException { |
89 |
boolean success = testTaxaCount(); |
|
90 |
if (success){ |
|
91 |
success &= testSingleTaxa(source.getUniqueInteger("SELECT count(*) FROM tu ")); |
|
92 |
} |
|
93 |
return success; |
|
107 |
System.out.println("Start validate taxa"); |
|
108 |
boolean success = testTaxaCount(); |
|
109 |
if (success){ |
|
110 |
success &= testSingleTaxa(source.getUniqueInteger(countTaxon)); |
|
111 |
} |
|
112 |
return success; |
|
94 | 113 |
} |
95 | 114 |
|
96 | 115 |
private boolean testReferences() throws SQLException { |
116 |
System.out.println("Start validate references"); |
|
97 | 117 |
boolean success = testReferenceCount(); |
98 | 118 |
if (success){ |
99 | 119 |
success &= testSingleReferences(); |
... | ... | |
101 | 121 |
return success; |
102 | 122 |
} |
103 | 123 |
|
124 |
private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts WHERE ts.tu_id <> 147415 "; |
|
125 |
private boolean testAdditionalTaxonSourcesCount() { |
|
126 |
int countSrc = source.getUniqueInteger(countAddtionalTaxonSource); |
|
127 |
int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource "); |
|
128 |
return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1)); |
|
129 |
} |
|
130 |
|
|
104 | 131 |
private boolean testNotesCount() { |
105 | 132 |
int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes "); |
106 | 133 |
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "); |
... | ... | |
119 | 146 |
return equals("CommonName count ", countSrc, countDest, String.valueOf(-1)); |
120 | 147 |
} |
121 | 148 |
|
149 |
private final String countTaxonRelation = "SELECT count(*) FROM tu WHERE tu_acctaxon <> id AND id NOT IN (147415) "; |
|
150 |
private boolean testTaxonRelationCount() { |
|
151 |
int countSrc = source.getUniqueInteger(countTaxonRelation); |
|
152 |
int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon "); |
|
153 |
return equals("Taxon count ", countSrc, countDest, String.valueOf(-1)); |
|
154 |
} |
|
155 |
|
|
156 |
|
|
157 |
private final String countTaxon = "SELECT count(*) FROM tu WHERE id NOT IN (147415)"; |
|
122 | 158 |
private boolean testTaxaCount() { |
123 |
int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id NOT IN (147415)");
|
|
159 |
int countSrc = source.getUniqueInteger(countTaxon);
|
|
124 | 160 |
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon "); |
125 | 161 |
return equals("Taxon count ", countSrc, countDest, String.valueOf(-1)); |
126 | 162 |
} |
... | ... | |
128 | 164 |
private boolean testSingleTaxa(int n) throws SQLException { |
129 | 165 |
boolean success = true; |
130 | 166 |
ResultSet srcRS = source.getResultSet("" |
131 |
+ " SELECT t.*, acc.tu_sp as acc_sp, st.status_name, " |
|
167 |
+ " SELECT t.*, r.rank_name, acc.tu_sp as acc_sp, st.status_name, "
|
|
132 | 168 |
+ " type.tu_displayname typename, type.tu_authority typeauthor, " |
133 | 169 |
+ " fo.fossil_name, qs.qualitystatus_name " |
134 | 170 |
+ " FROM tu t " |
135 |
+ " LEFT OUTER JOIN tu acc ON acc.id = t.tu_acctaxon " |
|
136 |
+ " LEFT OUTER JOIN status st ON st.status_id = t.tu_status " |
|
137 |
+ " LEFT OUTER JOIN tu type ON type.id = t.tu_typetaxon " |
|
138 |
+ " LEFT OUTER JOIN fossil fo ON t.tu_fossil = fo.fossil_id " |
|
139 |
+ " LEFT OUTER JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id " |
|
171 |
+ " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 40 AND rank_name = 'Subphylum' OR rank_id = 122 AND rank_name='Parvorder')) as r ON t.tu_rank = r.rank_id " |
|
172 |
+ " LEFT JOIN tu acc ON acc.id = t.tu_acctaxon " |
|
173 |
+ " LEFT JOIN status st ON st.status_id = t.tu_status " |
|
174 |
+ " LEFT JOIN tu type ON type.id = t.tu_typetaxon " |
|
175 |
+ " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id " |
|
176 |
+ " LEFT JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id " |
|
140 | 177 |
+ " WHERE t.id NOT IN (147415) " |
141 | 178 |
+ " ORDER BY CAST(t.id as nvarchar(20)) "); |
142 |
ResultSet destRS = destination.getResultSet("SELECT t.*, type.IdInSource typeSourceId "
|
|
179 |
ResultSet destRS = destination.getResultSet("SELECT t.*, s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
|
|
143 | 180 |
+ " FROM Taxon t " |
144 |
+ " LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk" |
|
181 |
+ " LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk " |
|
182 |
+ " LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk " |
|
183 |
+ " LEFT JOIN Source s ON s.SourceId = t.SourceFk " |
|
145 | 184 |
+ " WHERE t.OriginalDB = 'erms' " |
146 | 185 |
+ " ORDER BY t.IdInSource"); |
147 | 186 |
ResultSet srcRsLastAction = source.getResultSet("" |
... | ... | |
165 | 204 |
private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException { |
166 | 205 |
String id = String.valueOf(srcRS.getInt("id")); |
167 | 206 |
boolean success = equals("Taxon ID", "tu_id: " + srcRS.getInt("id"), destRS.getString("IdInSource"), id); |
168 |
//TODO SourceFk
|
|
207 |
success &= equals("Taxon source", "ERMS export for PESI", destRS.getString("sourceName"), id);
|
|
169 | 208 |
// success &= compareKingdom("Taxon kingdom", srcRS, destRS, id); |
170 |
//TODO RankFk, RankCache |
|
209 |
success &= equals("Taxon rank fk", srcRS.getString("tu_rank"), destRS.getString("RankFk"), id); |
|
210 |
// success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name")), destRS.getString("Rank"), id); |
|
171 | 211 |
//TODO GenusOrUninomial, InfraGenericEpithet, SpecificEpithet, InfraSpecificEpithet |
172 | 212 |
// success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id); |
173 | 213 |
// success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id); |
174 | 214 |
success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id); |
175 |
success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id); |
|
215 |
// success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
|
|
176 | 216 |
success &= isNull("NomRefString", destRS); |
177 |
success &= equals("Taxon DisplayName", srcFullName(srcRS), destRS.getString("DisplayName"), id); //according to SQL script same as FullName, no nom.ref. information attached
|
|
217 |
// success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id); //according to SQL script same as FullName, no nom.ref. information attached
|
|
178 | 218 |
|
179 | 219 |
//TODO success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id); |
180 | 220 |
//TODO success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id); |
... | ... | |
205 | 245 |
return success; |
206 | 246 |
} |
207 | 247 |
|
248 |
/** |
|
249 |
* @param string |
|
250 |
* @return |
|
251 |
*/ |
|
252 |
private String normalizeRank(String string) { |
|
253 |
String result = string.replace("Subforma", "Subform") |
|
254 |
.replace("Forma", "Form") |
|
255 |
// -- .replace("Subdivision", "Subphylum") |
|
256 |
; |
|
257 |
return result; |
|
258 |
} |
|
259 |
|
|
208 | 260 |
private String srcFullName(ResultSet srcRs) throws SQLException { |
209 | 261 |
String result = null; |
210 | 262 |
String epi = srcRs.getString("tu_name"); |
... | ... | |
219 | 271 |
return result; |
220 | 272 |
} |
221 | 273 |
|
274 |
private String srcDisplayName(ResultSet srcRs) throws SQLException { |
|
275 |
String result = null; |
|
276 |
String epi = srcRs.getString("tu_name"); |
|
277 |
epi = " a" + epi; |
|
278 |
String display = "<i>"+srcRs.getString("tu_displayname")+"</i>"; |
|
279 |
display = display.replace(" var. ", "</i> var. <i>").replace(" f. ", "</i> f. <i>"); |
|
280 |
String sp = srcRs.getString("tu_sp"); |
|
281 |
if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal |
|
282 |
result = display.replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" "; |
|
283 |
}else{ |
|
284 |
result = CdmUtils.concat(" ", display, srcRs.getString("tu_authority")); |
|
285 |
} |
|
286 |
return result; |
|
287 |
} |
|
288 |
|
|
222 | 289 |
private boolean testTaxonLastAction(ResultSet srcRs, ResultSet destRs, String id) throws SQLException { |
223 | 290 |
boolean success = true; |
224 | 291 |
while (srcRs.next()){ |
... | ... | |
253 | 320 |
} |
254 | 321 |
} |
255 | 322 |
|
323 |
private boolean testSingleTaxonRelations(int n) throws SQLException { |
|
324 |
boolean success = true; |
|
325 |
ResultSet srcRS = source.getResultSet("" |
|
326 |
+ " SELECT t.* " |
|
327 |
+ " FROM tu t " |
|
328 |
+ " WHERE t.id NOT IN (147415) AND tu_acctaxon <> id " |
|
329 |
+ " ORDER BY CAST(t.id as nvarchar(20)) "); |
|
330 |
ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id " |
|
331 |
+ " FROM RelTaxon rel " |
|
332 |
+ " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 " |
|
333 |
+ " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 " |
|
334 |
+ " WHERE t1.OriginalDB = 'erms' AND t2.OriginalDB = 'erms' " |
|
335 |
+ " ORDER BY t1.IdInSource"); |
|
336 |
int i = 0; |
|
337 |
while (srcRS.next() && destRS.next()){ |
|
338 |
success &= testSingleTaxonRelation(srcRS, destRS); |
|
339 |
i++; |
|
340 |
} |
|
341 |
success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1)); |
|
342 |
return success; |
|
343 |
} |
|
344 |
|
|
345 |
private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException { |
|
346 |
String id = String.valueOf(srcRS.getInt("id")); |
|
347 |
boolean success = equals("Taxon relation taxon1", "tu_id: " + srcRS.getInt("id"), destRS.getString("t1Id"), id); |
|
348 |
success &= equals("Taxon relation taxon2", "tu_id: " + srcRS.getInt("tu_acctaxon"), destRS.getString("t2Id"), id); |
|
349 |
success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id); |
|
350 |
success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id); |
|
351 |
//TODO enable after next import |
|
352 |
// success &= isNull("notes", destRS); |
|
353 |
//complete if no further relations need to added |
|
354 |
return success; |
|
355 |
} |
|
356 |
|
|
357 |
|
|
358 |
private boolean testSingleAdditionalTaxonSources(int n) throws SQLException { |
|
359 |
boolean success = true; |
|
360 |
ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, MN.*, s.*, su.sourceuse_name " |
|
361 |
+ " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id " |
|
362 |
+ " LEFT JOIN sources s ON s.id = MN.source_id " |
|
363 |
+ " LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id " |
|
364 |
+ " WHERE MN.tu_id NOT IN (147415) " |
|
365 |
+ " ORDER BY CAST(tu.id as nvarchar(20)), MN.sourceuse_id, s.id "); //, no.note (not possible because ntext |
|
366 |
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* " |
|
367 |
+ " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk " |
|
368 |
+ " INNER JOIN Source s ON s.SourceId = ats.SourceFk " |
|
369 |
+ " LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk " |
|
370 |
+ " WHERE t.OriginalDB = 'erms' " |
|
371 |
+ " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource "); |
|
372 |
int count = 0; |
|
373 |
while (srcRs.next() && destRs.next()){ |
|
374 |
success &= testSingleAdditionalTaxonSource(srcRs, destRs); |
|
375 |
count++; |
|
376 |
} |
|
377 |
success &= equals("Notes count differs", n, count, "-1"); |
|
378 |
return success; |
|
379 |
} |
|
380 |
|
|
381 |
private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException { |
|
382 |
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name")); |
|
383 |
boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id); |
|
384 |
success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id); //currently we use the same id in ERMS and PESI |
|
385 |
success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id); |
|
386 |
success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id); |
|
387 |
//TODO some records are still truncated ~ >820 characters |
|
388 |
//success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id); |
|
389 |
success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id); |
|
390 |
//Complete |
|
391 |
return success; |
|
392 |
} |
|
256 | 393 |
|
257 | 394 |
private boolean testSingleNotes(int n) throws SQLException { |
258 | 395 |
boolean success = true; |
259 |
ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId, no.*, l.LanName "
|
|
396 |
ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, no.*, l.LanName "
|
|
260 | 397 |
+ " FROM notes no INNER JOIN tu ON no.tu_id = tu.id " |
261 | 398 |
+ " LEFT JOIN languages l ON l.LanID = no.lan_id " |
262 | 399 |
+ " ORDER BY CAST(tu.id as nvarchar(20)), no.type "); //, no.note (not possible because ntext |
Also available in: Unified diff
ref #8508 latest changes to PesiErmsValidator