Revision a0f9168d
Added by Andreas Müller over 4 years ago
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/erms/validation/PesiErmsValidator.java | ||
---|---|---|
37 | 37 |
private Source destination = defaultDestination; |
38 | 38 |
|
39 | 39 |
public void invoke(Source source, Source destination){ |
40 |
logger.warn("Validate destination " + destination.getDatabase()); |
|
40 | 41 |
boolean success = true; |
41 | 42 |
try { |
42 | 43 |
this.source = source; |
43 | 44 |
this.destination = destination; |
44 |
success &= testReferences(); |
|
45 |
success &= testTaxa(); |
|
46 |
success &= testTaxonRelations(); |
|
47 |
success &= testCommonNames(); |
|
45 |
// success &= testReferences(); |
|
46 |
// success &= testTaxa(); |
|
47 |
// success &= testTaxonRelations(); |
|
48 |
// success &= testCommonNames(); |
|
49 |
success &= testDistributions(); |
|
48 | 50 |
} catch (Exception e) { |
49 | 51 |
e.printStackTrace(); |
50 | 52 |
success = false; |
... | ... | |
53 | 55 |
System.out.println("end validation " + (success? "":"NOT ") + "successful."); |
54 | 56 |
} |
55 | 57 |
|
58 |
private boolean testDistributions() throws SQLException { |
|
59 |
boolean success = testDistributionCount(); |
|
60 |
if (success){ |
|
61 |
success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr ")); |
|
62 |
} |
|
63 |
return success; |
|
64 |
} |
|
65 |
|
|
56 | 66 |
private boolean testCommonNames() throws SQLException { |
57 | 67 |
boolean success = testCommonNameCount(); |
58 | 68 |
if (success){ |
... | ... | |
82 | 92 |
return success; |
83 | 93 |
} |
84 | 94 |
|
95 |
private boolean testDistributionCount() { |
|
96 |
int countSrc = source.getUniqueInteger("SELECT count(*) FROM dr "); |
|
97 |
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence "); |
|
98 |
return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1)); |
|
99 |
} |
|
100 |
|
|
85 | 101 |
private boolean testCommonNameCount() { |
86 | 102 |
int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars "); |
87 | 103 |
int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName "); |
... | ... | |
89 | 105 |
} |
90 | 106 |
|
91 | 107 |
private boolean testTaxaCount() { |
92 |
int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu "); |
|
108 |
int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id NOT IN (147415)");
|
|
93 | 109 |
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon "); |
94 | 110 |
return equals("Taxon count ", countSrc, countDest, String.valueOf(-1)); |
95 | 111 |
} |
... | ... | |
106 | 122 |
+ " LEFT OUTER JOIN tu type ON type.id = t.tu_typetaxon " |
107 | 123 |
+ " LEFT OUTER JOIN fossil fo ON t.tu_fossil = fo.fossil_id " |
108 | 124 |
+ " LEFT OUTER JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id " |
125 |
+ " WHERE t.id NOT IN (147415) " |
|
109 | 126 |
+ " ORDER BY CAST(t.id as nvarchar(20)) "); |
110 | 127 |
ResultSet destRS = destination.getResultSet("SELECT t.*, type.IdInSource typeSourceId " |
111 | 128 |
+ " FROM Taxon t " |
... | ... | |
141 | 158 |
// success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id); |
142 | 159 |
success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id); |
143 | 160 |
success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id); |
144 |
//TODO NomRefString |
|
145 |
//TODO DisplayName |
|
161 |
success &= isNull("NomRefString", destRS); |
|
162 |
success &= equals("Taxon DisplayName", srcFullName(srcRS), destRS.getString("DisplayName"), id); //according to SQL script same as FullName, no nom.ref. information attached |
|
163 |
|
|
146 | 164 |
//TODO success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id); |
147 | 165 |
//TODO success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id); |
148 | 166 |
|
... | ... | |
220 | 238 |
} |
221 | 239 |
} |
222 | 240 |
|
241 |
private boolean testSingleDistributions(int n) throws SQLException { |
|
242 |
boolean success = true; |
|
243 |
ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId, gu.gazetteer_id, dr.*, gu.id guId, gu.gu_name " |
|
244 |
+ " FROM dr INNER JOIN tu ON dr.tu_id = tu.id " |
|
245 |
+ " LEFT JOIN gu ON gu.id = dr.gu_id " |
|
246 |
+ " ORDER BY CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)), gu.gazetteer_id, gu.gu_name "); //, dr.note (not possible because ntext |
|
247 |
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaERMSGazetteerId, oc.*, a.AreaName " |
|
248 |
+ " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk " |
|
249 |
+ " LEFT JOIN Area a ON a.AreaId = oc.AreaFk " |
|
250 |
+ " WHERE t.OriginalDB = 'erms' " |
|
251 |
+ " ORDER BY t.IdInSource, a.AreaERMSGazetteerId, a.AreaName, oc.Notes "); |
|
252 |
int count = 0; |
|
253 |
while (srcRs.next() && destRs.next()){ |
|
254 |
success &= testSingleDistribution(srcRs, destRs); |
|
255 |
count++; |
|
256 |
} |
|
257 |
success &= equals("Distribution count differs", n, count, "-1"); |
|
258 |
return success; |
|
259 |
} |
|
260 |
|
|
261 |
private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException { |
|
262 |
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("gu_name")); |
|
263 |
boolean success = equals("Distribution taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id); |
|
264 |
success &= equals("Distribution gazetteer_id ", srcRs.getString("gazetteer_id"), destRs.getString("AreaERMSGazetteerId"), id); |
|
265 |
success &= equals("Distribution area name ", srcRs.getString("gu_name"), destRs.getString("AreaName"), id); |
|
266 |
success &= equals("Distribution area name ", srcRs.getString("note"), destRs.getString("Notes"), id); |
|
267 |
|
|
268 |
//TODO |
|
269 |
return success; |
|
270 |
} |
|
271 |
|
|
223 | 272 |
private boolean testSingleCommonNames(int n) throws SQLException { |
224 | 273 |
boolean success = true; |
225 |
ResultSet srcRs = source.getResultSet("SELECT v.*, l.LanName, tu.id tuId " |
|
274 |
ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
|
|
226 | 275 |
+ " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id " |
227 |
+ " ORDER BY CAST(tu.id as nvarchar(20)), tu.id ");
|
|
228 |
ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2 " |
|
229 |
+ " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk INNER JOIN Language l ON l.LanguageId = cn.LanguageFk "
|
|
276 |
+ " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername ");
|
|
277 |
ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
|
|
278 |
+ " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
|
|
230 | 279 |
+ " WHERE t.OriginalDB = 'erms' " |
231 |
+ " ORDER BY t.IdInSource, l.ISO639_2");
|
|
280 |
+ " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName");
|
|
232 | 281 |
int count = 0; |
233 | 282 |
while (srcRs.next() && destRs.next()){ |
234 | 283 |
success &= testSingleCommonName(srcRs, destRs); |
... | ... | |
238 | 287 |
return success; |
239 | 288 |
} |
240 | 289 |
|
290 |
boolean prefer639_3 = true; |
|
291 |
String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2"; |
|
292 |
String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3"; |
|
293 |
|
|
241 | 294 |
private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException { |
242 | 295 |
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id")); |
243 | 296 |
boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id); |
244 |
success &= equals("Common name languageID ", srcRs.getString("lan_id"), destRs.getString("ISO639_2"), id);
|
|
297 |
success &= equals("Common name languageID ", srcRs.getString("iso"), getLanguageIso(destRs), id);
|
|
245 | 298 |
success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id); |
246 | 299 |
//TODO success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id); |
247 |
success = equals("CommonName LanguageCache ", srcRs.getString("LanName"), destRs.getString("LanguageCache"), id);
|
|
300 |
success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
|
|
248 | 301 |
success &= isNull("Region", destRs); //region does not seem to exist in ERMS |
249 | 302 |
|
250 | 303 |
//TODO |
251 | 304 |
return success; |
252 | 305 |
} |
253 | 306 |
|
307 |
private String normalizeLang(String string) { |
|
308 |
if ("Norwegian Nynorsk".equals(string)){ |
|
309 |
return "Nynorsk (Norwegian)"; |
|
310 |
}else if ("Norwegian Bokmål".equals(string)){ |
|
311 |
return "Bokmål (Norwegian)"; |
|
312 |
}else if ("Spanish".equals(string)){ |
|
313 |
return "Spanish, Castillian"; |
|
314 |
}else if ("Modern Greek (1453-)".equals(string)){ |
|
315 |
return "Greek"; |
|
316 |
}else if ("Hebrew".equals(string)){ |
|
317 |
return "Israel (Hebrew)"; |
|
318 |
}else if ("Malay (individual language)".equals(string)){ |
|
319 |
return "Malay"; |
|
320 |
} |
|
321 |
|
|
322 |
return string; |
|
323 |
} |
|
324 |
|
|
325 |
private String getLanguageIso(ResultSet destRs) throws SQLException { |
|
326 |
String result = destRs.getString(preferredISO639); |
|
327 |
if (result == null){ |
|
328 |
result = destRs.getString(alternativeISO639); |
|
329 |
} |
|
330 |
return result; |
|
331 |
} |
|
332 |
|
|
254 | 333 |
private boolean testSingleReferences() throws SQLException { |
255 | 334 |
boolean success = true; |
256 | 335 |
ResultSet srcRS = source.getResultSet("SELECT s.* FROM sources s ORDER BY s.id "); |
Also available in: Unified diff
ref #8508 latest changes to ERMS pipeline validation