1
|
/**
|
2
|
* Copyright (C) 2019 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.app.pesi.validate;
|
10
|
|
11
|
import java.sql.ResultSet;
|
12
|
import java.sql.SQLException;
|
13
|
import java.sql.Timestamp;
|
14
|
|
15
|
import org.apache.commons.lang.StringUtils;
|
16
|
import org.apache.log4j.Logger;
|
17
|
|
18
|
import eu.etaxonomy.cdm.app.common.PesiDestinations;
|
19
|
import eu.etaxonomy.cdm.app.common.PesiSources;
|
20
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
21
|
import eu.etaxonomy.cdm.io.common.Source;
|
22
|
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
|
23
|
|
24
|
/**
|
25
|
* Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
|
26
|
*
|
27
|
* @author a.mueller
|
28
|
* @since 01.09.2019
|
29
|
*/
|
30
|
public class PesiErmsValidator extends PesiValidatorBase {
|
31
|
|
32
|
private static final Logger logger = Logger.getLogger(PesiErmsValidator.class);
|
33
|
|
34
|
private static final Source defaultSource = PesiSources.PESI2019_ERMS_2019();
|
35
|
// private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI();
|
36
|
private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_ERMS2PESI_2();
|
37
|
|
38
|
private Source source = defaultSource;
|
39
|
private Source destination = defaultDestination;
|
40
|
private String moneraFilter = " NOT IN (-1)"; // 147415;
|
41
|
// private String moneraFilter = " NOT IN (147415)"; // 147415;
|
42
|
|
43
|
private String origErms = "OriginalDB = 'ERMS' ";
|
44
|
|
45
|
public void invoke(Source source, Source destination){
|
46
|
logger.warn("Validate destination " + destination.getDatabase());
|
47
|
boolean success = true;
|
48
|
try {
|
49
|
this.source = source;
|
50
|
this.destination = destination;
|
51
|
// success &= testReferences(); //ready, few minor issues to be discussed with VLIZ
|
52
|
success &= testTaxa();
|
53
|
// success &= testTaxonRelations(); //name relations count!, single record compare tests for synonyms and included in
|
54
|
// success &= testCommonNames(); //source(s) discuss VLIZ, exact duplicates (except for sources), Anus(Korur)
|
55
|
// success &= testDistributions(); //>1000 duplicates in "dr", sources (OccurrenceSource table), 1 long note
|
56
|
// success &= testNotes(); //ecology & link notes test (only count tested), sources untested (NoteSource table)
|
57
|
// success &= testAdditionalTaxonSources(); //ready
|
58
|
} catch (Exception e) {
|
59
|
e.printStackTrace();
|
60
|
success = false;
|
61
|
}
|
62
|
System.out.println("end validation " + (success? "":"NOT ") + "successful.");
|
63
|
}
|
64
|
|
65
|
private boolean testAdditionalTaxonSources() throws SQLException {
|
66
|
System.out.println("Start validate additional taxon sources");
|
67
|
boolean success = testAdditionalTaxonSourcesCount();
|
68
|
if (success){
|
69
|
success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
|
70
|
}
|
71
|
return success;
|
72
|
}
|
73
|
|
74
|
private boolean testNotes() throws SQLException {
|
75
|
System.out.println("Start validate notes");
|
76
|
boolean success = testNotesCount();
|
77
|
if (success){
|
78
|
success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
|
79
|
}
|
80
|
return success;
|
81
|
}
|
82
|
|
83
|
private boolean testDistributions() throws SQLException {
|
84
|
System.out.println("Start validate distributions");
|
85
|
boolean success = testDistributionCount();
|
86
|
if (success){
|
87
|
success &= testSingleDistributions(source.getUniqueInteger("SELECT count(*) FROM dr "));
|
88
|
}
|
89
|
return success;
|
90
|
}
|
91
|
|
92
|
private boolean testCommonNames() throws SQLException {
|
93
|
System.out.println("Start validate common names");
|
94
|
boolean success = testCommonNameCount();
|
95
|
if (success){
|
96
|
success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
|
97
|
}
|
98
|
return success;
|
99
|
}
|
100
|
|
101
|
int countSynonyms;
|
102
|
int countIncludedIns;
|
103
|
private boolean testTaxonRelations() throws SQLException {
|
104
|
System.out.println("Start validate taxon relations");
|
105
|
boolean success = testSynonymRelations(); //only count, single record test still missing
|
106
|
success &= testIncludedInRelations(); //only count, single record test still missing
|
107
|
success &= testTotalRelations();
|
108
|
success &= testNameRelations();
|
109
|
return success;
|
110
|
}
|
111
|
|
112
|
private boolean testTotalRelations() {
|
113
|
if (!(countSynonyms < 0 || countIncludedIns < 0)){
|
114
|
int countTotalSrc = countSynonyms + countIncludedIns;
|
115
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
|
116
|
boolean success = equals("Taxrel count + 1 must be same as source taxon count ", countTotalSrc+1, countSrc, String.valueOf(-1));
|
117
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t."+ origErms);
|
118
|
success &= equals("Taxrel count + 1 must be same as destination taxon count ", countTotalSrc+1, countDest, String.valueOf(-1));
|
119
|
return success;
|
120
|
}else{
|
121
|
return false;
|
122
|
}
|
123
|
}
|
124
|
|
125
|
private boolean testSynonymRelations() throws SQLException {
|
126
|
|
127
|
int countSrc = source.getUniqueInteger(countSynonymRelation);
|
128
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
|
129
|
boolean success = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
|
130
|
// update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'currently placed%'
|
131
|
// update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'currently held%'
|
132
|
// update Match_RelStat set RelTaxon = 102 where tu_unacceptreason like 'sy%' or tu_unacceptreason like '%jun%syn%'
|
133
|
// update Match_RelStat set RelTaxon = 102 where tu_unacceptreason = '(synonym)'
|
134
|
// update Match_RelStat set RelTaxon = 102 where tu_unacceptreason = 'reverted genus transfer'
|
135
|
// update Match_RelStat set RelTaxon = 103 where tu_unacceptreason like 'misapplied%'
|
136
|
// update Match_RelStat set RelTaxon = 104 where tu_unacceptreason like 'part% synonym%'
|
137
|
// update Match_RelStat set RelTaxon = 106 where tu_unacceptreason = 'heterotypic synonym' or tu_unacceptreason = 'subjective synonym'
|
138
|
// update Match_RelStat set RelTaxon = 107 where tu_unacceptreason like '%homot%syn%' or tu_unacceptreason = 'objective synonym' synyonym
|
139
|
// update Match_RelStat set RelTaxon = 107 where tu_unacceptreason like '%bas[iy][no]%ny%'
|
140
|
if (success){
|
141
|
//TODO test single synonym relations
|
142
|
// success &= testSingleTaxonRelations(source.getUniqueInteger(countSynonymRelation));
|
143
|
}
|
144
|
countSynonyms = (countSrc == countDest)? countSrc : -1;
|
145
|
return success;
|
146
|
}
|
147
|
|
148
|
private boolean testNameRelations() {
|
149
|
//Name relations
|
150
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter + " AND ("
|
151
|
+ " tu_unacceptreason like '%bas[iy][no]%ny%' OR tu_unacceptreason = 'original combination' "
|
152
|
+ " OR tu_unacceptreason = 'Subsequent combination' OR tu_unacceptreason like '%genus transfer%' "
|
153
|
+ " OR tu_unacceptreason = 'genus change' " //1
|
154
|
+ " OR tu_unacceptreason like '%homon%' " // 2
|
155
|
+ " OR tu_unacceptreason like '%spell%' OR tu_unacceptreason like 'lapsus %' " //16
|
156
|
|
157
|
+ ")");
|
158
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
|
159
|
boolean success = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
|
160
|
if (success){
|
161
|
//TODO test single name relation
|
162
|
// success &= testSingleTaxonRelations(source.getUniqueInteger(countSynonymRelation));
|
163
|
}
|
164
|
return success;
|
165
|
}
|
166
|
|
167
|
private boolean testIncludedInRelations() {
|
168
|
int countSrc = source.getUniqueInteger(countParentRelation);
|
169
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
|
170
|
boolean success = equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
|
171
|
if (success){
|
172
|
//TODO test single includedIn relations
|
173
|
// success &= testSingleTaxonRelations(source.getUniqueInteger(countSynonymRelation));
|
174
|
}
|
175
|
countIncludedIns = (countSrc == countDest)? countSrc : -1;
|
176
|
return success;
|
177
|
}
|
178
|
|
179
|
private boolean testTaxa() throws SQLException {
|
180
|
System.out.println("Start validate taxa");
|
181
|
boolean success = testTaxaCount();
|
182
|
//FIXME
|
183
|
if (!success){
|
184
|
success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
|
185
|
}
|
186
|
return success;
|
187
|
}
|
188
|
|
189
|
private boolean testReferences() throws SQLException {
|
190
|
System.out.println("Start validate references");
|
191
|
boolean success = testReferenceCount();
|
192
|
if (success){
|
193
|
success &= testSingleReferences();
|
194
|
}
|
195
|
return success;
|
196
|
}
|
197
|
|
198
|
private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts WHERE ts.tu_id " + moneraFilter;
|
199
|
private boolean testAdditionalTaxonSourcesCount() {
|
200
|
int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
|
201
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
|
202
|
return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
|
203
|
}
|
204
|
|
205
|
private boolean testNotesCount() {
|
206
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
|
207
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
|
208
|
+ " WHERE NOT (NoteCategoryFk = 4 AND LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) ");
|
209
|
boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
|
210
|
|
211
|
countSrc = source.getUniqueInteger("SELECT count(*) FROM tu "
|
212
|
+ " WHERE (tu_marine IS NOT NULL OR tu_brackish IS NOT NULL OR tu_fresh IS NOT NULL OR tu_terrestrial IS NOT NULL) "
|
213
|
+ " AND tu.id " + moneraFilter );
|
214
|
countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
|
215
|
+ " WHERE (NoteCategoryFk = 4 AND LastAction IS NULL) ");
|
216
|
result &= equals("Notes ecology count ", countSrc, countDest, String.valueOf(-1));
|
217
|
|
218
|
countSrc = source.getUniqueInteger("SELECT count(*) FROM links ");
|
219
|
countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
|
220
|
+ " WHERE NoteCategoryFk IN (22,23,24) ");
|
221
|
result &= equals("Notes link count ", countSrc, countDest, String.valueOf(-1));
|
222
|
|
223
|
return result;
|
224
|
}
|
225
|
|
226
|
private boolean testDistributionCount() {
|
227
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM dr ");
|
228
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
|
229
|
return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
|
230
|
}
|
231
|
|
232
|
private boolean testCommonNameCount() {
|
233
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
|
234
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
|
235
|
return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
|
236
|
}
|
237
|
|
238
|
private final String countSynonymRelation = "SELECT count(*) FROM tu syn LEFT JOIN tu acc ON syn.tu_accfinal = acc.id WHERE (syn.id <> acc.id AND syn.tu_accfinal IS NOT NULL AND syn.id <> acc.tu_parent) AND syn.id " + moneraFilter;
|
239
|
private final String countParentRelation = "SELECT count(*)-1 FROM tu syn LEFT JOIN tu acc ON syn.tu_accfinal = acc.id WHERE (syn.id = acc.id OR syn.tu_accfinal IS NULL OR syn.id = acc.tu_parent) AND syn.id " + moneraFilter;
|
240
|
|
241
|
private final String countTaxon = "SELECT count(*) FROM tu WHERE id " + moneraFilter;
|
242
|
private boolean testTaxaCount() {
|
243
|
int countSrc = source.getUniqueInteger(countTaxon);
|
244
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon ");
|
245
|
boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
|
246
|
|
247
|
//NomStatus
|
248
|
countSrc = source.getUniqueInteger("SELECT count(*) FROM tu WHERE id " + moneraFilter + " AND ("
|
249
|
+ " tu_unacceptreason like '%inval%' OR tu_unacceptreason like '%not val%' "
|
250
|
+ " OR tu_unacceptreason like '%illeg%' OR tu_unacceptreason like '%nud%' "
|
251
|
+ " OR tu_unacceptreason like '%rej.%' OR tu_unacceptreason like '%superfl%' "
|
252
|
+ " OR tu_unacceptreason like '%Comb. nov%' OR tu_unacceptreason like '%New name%' "
|
253
|
+ " OR tu_unacceptreason = 'new combination' "
|
254
|
+ " OR tu_status IN (3,5,6,7,8) )");
|
255
|
countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon WHERE NameStatusFk IS NOT NULL ");
|
256
|
result = equals("Taxon name status count ", countSrc, countDest, String.valueOf(-1));
|
257
|
|
258
|
return result;
|
259
|
}
|
260
|
|
261
|
private boolean testSingleTaxa(int n) throws SQLException {
|
262
|
boolean success = true;
|
263
|
ResultSet srcRS = source.getResultSet(""
|
264
|
+ " SELECT t.*, pt.tu_name pt_name, pt.id pId, pt.tu_accfinal pAccId, "
|
265
|
+ " acc.tu_sp as acc_sp, accP.id accPId, "
|
266
|
+ " r.rank_name, st.status_name, "
|
267
|
+ " type.tu_displayname typename, type.tu_authority typeauthor, "
|
268
|
+ " fo.fossil_name, qs.qualitystatus_name "
|
269
|
+ " FROM tu t "
|
270
|
+ " LEFT JOIN tu as pt on t.tu_parent = pt.id "
|
271
|
+ " LEFT JOIN (SELECT DISTINCT rank_id, rank_name FROM ranks WHERE NOT(rank_id = 30 AND rank_name = 'Phylum (Division)' OR rank_id = 40 AND rank_name = 'Subphylum (Subdivision)' OR rank_id = 122 AND rank_name='Subsection')) as r ON t.tu_rank = r.rank_id "
|
272
|
+ " LEFT JOIN tu acc ON acc.id = t.tu_accfinal "
|
273
|
+ " LEFT JOIN tu accP ON acc.tu_parent = accP.id "
|
274
|
+ " LEFT JOIN status st ON st.status_id = t.tu_status "
|
275
|
+ " LEFT JOIN tu type ON type.id = t.tu_typetaxon "
|
276
|
+ " LEFT JOIN fossil fo ON t.tu_fossil = fo.fossil_id "
|
277
|
+ " LEFT JOIN qualitystatus qs ON t.tu_qualitystatus = qs.id "
|
278
|
+ " WHERE t.id " + moneraFilter
|
279
|
+ " ORDER BY CAST(t.id as nvarchar(20)) ");
|
280
|
ResultSet destRS = destination.getResultSet("SELECT t.*, "
|
281
|
+ " pt.GenusOrUninomial p_GenusOrUninomial, pt.InfraGenericEpithet p_InfraGenericEpithet, pt.SpecificEpithet p_SpecificEpithet, "
|
282
|
+ " pt.treeIndex pTreeIndex, pt.IdInSource pIdInSource, "
|
283
|
+ " s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
|
284
|
+ " FROM Taxon t "
|
285
|
+ " LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
|
286
|
+ " LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
|
287
|
+ " LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
|
288
|
+ " LEFT JOIN Source s ON s.SourceId = t.SourceFk "
|
289
|
+ " WHERE t."+ origErms
|
290
|
+ " ORDER BY t.IdInSource");
|
291
|
ResultSet srcRsLastAction = source.getResultSet(""
|
292
|
+ " SELECT t.id, s.sessiondate, a.action_name, s.ExpertName "
|
293
|
+ " FROM tu t "
|
294
|
+ " LEFT OUTER JOIN tu_sessions MN ON t.id = MN.tu_id "
|
295
|
+ " LEFT JOIN actions a ON a.id = MN.action_id "
|
296
|
+ " LEFT JOIN sessions s ON s.id = MN.session_id "
|
297
|
+ " ORDER BY CAST(t.id as nvarchar(20)), s.sessiondate DESC, a.id DESC ");
|
298
|
int i = 0;
|
299
|
while (srcRS.next() && destRS.next()){
|
300
|
success &= testSingleTaxon(srcRS, destRS);
|
301
|
success &= testLastAction(srcRsLastAction, destRS, String.valueOf(srcRS.getInt("id")), "Taxon");
|
302
|
i++;
|
303
|
}
|
304
|
success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
|
305
|
return success;
|
306
|
}
|
307
|
|
308
|
private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
|
309
|
String id = String.valueOf(srcRS.getInt("id"));
|
310
|
//complete
|
311
|
boolean success = equals("Taxon ID", "tu_id: " + srcRS.getInt("id"), destRS.getString("IdInSource"), id);
|
312
|
success &= equals("Taxon source", "ERMS export for PESI", destRS.getString("sourceName"), id);
|
313
|
|
314
|
success &= compareKingdom("Taxon kingdom", srcRS, destRS, id);
|
315
|
success &= equals("Taxon rank fk", srcRS.getString("tu_rank"), destRS.getString("RankFk"), id);
|
316
|
success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name"), srcRS, id), destRS.getString("Rank"), id);
|
317
|
success &= compareNameParts(srcRS, destRS, id);
|
318
|
|
319
|
success &= equals("Taxon websearchname", srcRS.getString("tu_displayname"), destRS.getString("WebSearchName"), id);
|
320
|
//in ERMS displayName and webShowName should be equal as we do not have a correctly formatted nom. ref.
|
321
|
// success &= equals("Taxon WebShowName", srcDisplayName(srcRS), destRS.getString("WebShowName"), id);
|
322
|
success &= equals("Taxon authority", srcRS.getString("tu_authority"), destRS.getString("AuthorString"), id);
|
323
|
// success &= equals("Taxon FullName", srcFullName(srcRS), destRS.getString("FullName"), id);
|
324
|
success &= isNull("NomRefString", destRS, id);
|
325
|
// success &= equals("Taxon DisplayName", srcDisplayName(srcRS), destRS.getString("DisplayName"), id); //according to SQL script same as FullName, no nom.ref. information attached
|
326
|
|
327
|
//TODO nameStatusFk success &= equals("Taxon NameStatusFk", toNameStatus(nullSafeInt(srcRS, "tu_status")),nullSafeInt( destRS,"NameStatusFk"), id);
|
328
|
//TODO nameStatusCache success &= equals("Taxon NameStatusCache", srcRS.getString("status_name"), destRS.getString("NameStatusCache"), id);
|
329
|
|
330
|
success &= equals("Taxon TaxonStatusFk", normalizeTaxonStatusFk(srcRS),nullSafeInt( destRS,"TaxonStatusFk"), id);
|
331
|
success &= equals("Taxon TaxonStatusCache", normalizeTaxonStatusCache(srcRS), destRS.getString("TaxonStatusCache"), id);
|
332
|
|
333
|
success &= equals("Taxon ParentTaxonFk", srcParentTaxonFk(srcRS), destParentIdInSource(destRS), id);
|
334
|
Integer orgigTypeNameFk = nullSafeInt(srcRS, "tu_typetaxon");
|
335
|
success &= equals("Taxon TypeNameFk", orgigTypeNameFk == null? null : "tu_id: " + orgigTypeNameFk, destRS.getString("typeSourceId"), id);
|
336
|
//TODO success &= equals("Taxon TypeFullNameCache", CdmUtils.concat(" ", srcRS.getString("typename"), srcRS.getString("typeauthor")), destRS.getString("TypeFullNameCache"), id);
|
337
|
success &= equals("Taxon QualityStatusFK", nullSafeInt(srcRS, "tu_qualitystatus"),nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
|
338
|
success &= equals("Taxon QualityStatusCache", srcRS.getString("qualitystatus_name"), destRS.getString("QualityStatusCache"), id);
|
339
|
//the >200 taxa having themselves as grandparents are currently still reported as errors (what they are but they are handled during im/export
|
340
|
// success &= testTreeIndex(destRS, "TreeIndex", "pTreeIndex", id);
|
341
|
success &= equals("Taxon FossilStatusFk", nullSafeInt(srcRS, "tu_fossil"),nullSafeInt( destRS,"FossilStatusFk"), String.valueOf(id));
|
342
|
success &= equals("Taxon FossilStatusCache", srcRS.getString("fossil_name"), destRS.getString("FossilStatusCache"), id);
|
343
|
success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
|
344
|
success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
|
345
|
success &= isNull("ExpertGUID", destRS, id); //only relevant after merge
|
346
|
success &= isNull("ExpertName", destRS, id); //only relevant after merge
|
347
|
success &= isNull("SpeciesExpertGUID", destRS, id); //only relevant after merge
|
348
|
success &= equals("Taxon cache citation", srcRS.getString("cache_citation"), destRS.getString("CacheCitation"), id);
|
349
|
//SpeciesExpertName, LastAction and LastActionDate handled in separate method
|
350
|
success &= isNull("GUID2", destRS, id); //only relevant after merge
|
351
|
success &= isNull("DerivedFromGuid2", destRS, id); //only relevant after merge
|
352
|
return success;
|
353
|
}
|
354
|
|
355
|
private Integer destParentIdInSource(ResultSet destRS) throws SQLException {
|
356
|
String parentIdInSource = destRS.getString("pIdInSource");
|
357
|
if (parentIdInSource == null){
|
358
|
return null;
|
359
|
}else{
|
360
|
String idStr = parentIdInSource.replace("tu_id:", "").trim();
|
361
|
Integer result = Integer.valueOf(idStr);
|
362
|
return result;
|
363
|
}
|
364
|
}
|
365
|
|
366
|
private Integer srcParentTaxonFk(ResultSet srcRS) throws SQLException {
|
367
|
Integer id = nullSafeInt(srcRS,"id");
|
368
|
Integer accId = nullSafeInt(srcRS, "tu_accfinal");
|
369
|
Integer pId = nullSafeInt(srcRS, "pId");
|
370
|
Integer pAccId = nullSafeInt(srcRS, "pAccId");
|
371
|
Integer accPId = nullSafeInt(srcRS, "accPId"); //parent of accepted taxon
|
372
|
|
373
|
if (accId != null && !id.equals(accId)){
|
374
|
if (id.equals(accPId)){
|
375
|
return pId; //exceptional handling to avoid recursion mostly for some autonyms and alternate representations
|
376
|
}else{
|
377
|
return null; //taxon is not accepted
|
378
|
}
|
379
|
}else{
|
380
|
if (id == 1){
|
381
|
return null; //Biota
|
382
|
}else if (pAccId == null){
|
383
|
return pId; //handle parent preliminary as accepted
|
384
|
}else if (id.equals(pAccId)){
|
385
|
return pId; //exceptional handling to avoid recursion mostly for some autonyms and alternate representations
|
386
|
}else{
|
387
|
return pAccId;
|
388
|
}
|
389
|
}
|
390
|
}
|
391
|
|
392
|
private Integer normalizeTaxonStatusFk(ResultSet srcRS) throws SQLException {
|
393
|
int id = srcRS.getInt("id");
|
394
|
Integer accFinal = nullSafeInt(srcRS, "tu_accfinal");
|
395
|
boolean accFinalDiffers = accFinal != null && !accFinal.equals(id);
|
396
|
|
397
|
if (accFinalDiffers){
|
398
|
return PesiTransformer.T_STATUS_SYNONYM; //2
|
399
|
}else{
|
400
|
Integer status = nullSafeInt(srcRS, "tu_status");
|
401
|
if(status == 1){ //accepted
|
402
|
return PesiTransformer.T_STATUS_ACCEPTED;
|
403
|
}else if (status == 6 || status == 8 || status == 10){ //nomen dubium, taxon inquirendum (status uncertain), uncertain
|
404
|
return PesiTransformer.T_STATUS_UNRESOLVED;
|
405
|
}else if (status == 2 || status == 3 || status == 5 || status == 7 || status == 9){ //unaccepted, nomen nudum, alternate representation, temporary name, interim unpublished
|
406
|
return PesiTransformer.T_STATUS_UNACCEPTED;
|
407
|
}
|
408
|
}
|
409
|
//4 does not exist and should not happen
|
410
|
return -1;
|
411
|
}
|
412
|
|
413
|
private String normalizeTaxonStatusCache(ResultSet srcRS) throws SQLException {
|
414
|
Integer status = normalizeTaxonStatusFk(srcRS);
|
415
|
if (status == 1){
|
416
|
return "accepted";
|
417
|
}else if (status == 2){
|
418
|
return "synonym";
|
419
|
}else if (status == 4){
|
420
|
return "pro parte synonym";
|
421
|
}else if (status == 5){
|
422
|
return "unresolved";
|
423
|
}else if (status == 7){
|
424
|
return "unaccepted";
|
425
|
}else{
|
426
|
return "xxx - not yet handled";
|
427
|
}
|
428
|
}
|
429
|
|
430
|
boolean namePartsFirst = true;
|
431
|
private boolean compareNameParts(ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
|
432
|
if (namePartsFirst){
|
433
|
logger.warn("Validation of name parts not fully implemented (difficult). Currently validated via fullname");
|
434
|
namePartsFirst = false;
|
435
|
}
|
436
|
int rankFk = srcRS.getInt("tu_rank");
|
437
|
String genusOrUninomial = null;
|
438
|
String infraGenericEpithet = null;
|
439
|
String specificEpithet = null;
|
440
|
String infraSpecificEpithet = null;
|
441
|
if (rankFk <= 180){
|
442
|
genusOrUninomial = srcRS.getString("tu_name");
|
443
|
}else if (rankFk == 190){
|
444
|
genusOrUninomial = srcRS.getString("pt_name");
|
445
|
infraGenericEpithet = srcRS.getString("tu_name");
|
446
|
//TODO compareNameParts does not work this way
|
447
|
// }else if (rankFk == 220){
|
448
|
// genusOrUninomial = destRS.getString("p_GenusOrUninomial");
|
449
|
// infraGenericEpithet = destRS.getString("p_InfraGenericEpithet");
|
450
|
// specificEpithet = srcRS.getString("tu_name");
|
451
|
}else{
|
452
|
//TODO exception (compare name parts)
|
453
|
return false;
|
454
|
}
|
455
|
boolean result = testEpis(destRS, genusOrUninomial, infraGenericEpithet,
|
456
|
specificEpithet, infraSpecificEpithet, id);
|
457
|
return result;
|
458
|
}
|
459
|
|
460
|
private boolean testEpis(ResultSet destRS, String genusOrUninomial, String infraGenericEpithet, String specificEpithet,
|
461
|
String infraSpecificEpithet, String id) throws SQLException {
|
462
|
boolean result = equals("Taxon genusOrUninomial", genusOrUninomial, destRS.getString("GenusOrUninomial"), id) ;
|
463
|
result &= equals("Taxon infraGenericEpithet", infraGenericEpithet, destRS.getString("InfraGenericEpithet"), id) ;
|
464
|
result &= equals("Taxon specificEpithet", specificEpithet, destRS.getString("SpecificEpithet"), id) ;
|
465
|
result &= equals("Taxon infraSpecificEpithet", infraSpecificEpithet, destRS.getString("InfraSpecificEpithet"), id) ;
|
466
|
return result;
|
467
|
}
|
468
|
|
469
|
private String normalizeRank(String string, ResultSet srcRS, String id) throws SQLException {
|
470
|
String result = string
|
471
|
.replace("Subforma", "Subform")
|
472
|
.replace("Forma", "Form");
|
473
|
int kingdomFk = Integer.valueOf(getSourceKingdomFk(srcRS, id));
|
474
|
if (kingdomFk == 3 || kingdomFk == 4){
|
475
|
result = result.replace("Subphylum", "Subdivision");
|
476
|
result = result.replace("Phylum", "Division");
|
477
|
}
|
478
|
return result;
|
479
|
}
|
480
|
|
481
|
//see also ErmsTaxonImport.getExpectedTitleCache()
|
482
|
private String srcFullName(ResultSet srcRs) throws SQLException {
|
483
|
String result = null;
|
484
|
String epi = srcRs.getString("tu_name");
|
485
|
String display = srcRs.getString("tu_displayname");
|
486
|
String sp = srcRs.getString("tu_sp");
|
487
|
if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //autonym, !animal
|
488
|
String authority = srcRs.getString("tu_authority");
|
489
|
result = srcRs.getString("tu_displayname").replaceFirst(epi+" ", CdmUtils.concat(" ", epi, authority)+" ");
|
490
|
}else{
|
491
|
result = CdmUtils.concat(" ", srcRs.getString("tu_displayname"), srcRs.getString("tu_authority"));
|
492
|
}
|
493
|
return result;
|
494
|
}
|
495
|
|
496
|
private String srcDisplayName(ResultSet srcRs) throws SQLException {
|
497
|
String result = null;
|
498
|
String epi = srcRs.getString("tu_name");
|
499
|
epi = " a" + epi;
|
500
|
String display = "<i>"+srcRs.getString("tu_displayname")+"</i>";
|
501
|
display = display.replace(" var. ", "</i> var. <i>").replace(" f. ", "</i> f. <i>");
|
502
|
String sp = srcRs.getString("tu_sp");
|
503
|
if (display.indexOf(epi) != display.lastIndexOf(epi) && !sp.startsWith("#2#")){ //homonym, animal
|
504
|
result = display.replaceFirst(epi+" ", CdmUtils.concat(" ", " "+epi, srcRs.getString("tu_authority")))+" ";
|
505
|
}else{
|
506
|
result = CdmUtils.concat(" ", display, srcRs.getString("tu_authority"));
|
507
|
}
|
508
|
return result;
|
509
|
}
|
510
|
|
511
|
String lastLastActionId = "-1";
|
512
|
private boolean testLastAction(ResultSet srcRsLastAction, ResultSet destRs, String id, String table) throws SQLException {
|
513
|
try {
|
514
|
boolean success = true;
|
515
|
String srcId = null;
|
516
|
while (srcRsLastAction.next()){
|
517
|
srcId = String.valueOf(srcRsLastAction.getInt("id"));
|
518
|
if (!lastLastActionId.equals(srcId)){
|
519
|
lastLastActionId = srcId;
|
520
|
break;
|
521
|
}
|
522
|
}
|
523
|
if(!id.equals(srcId)){
|
524
|
logger.warn("Last Action SourceIDs are not equal: id: " +id + ", la-id: " + srcId);
|
525
|
}
|
526
|
String destStr = destRs.getString("LastAction");
|
527
|
success &= equals(table + " SpeciesExpertName", srcRsLastAction.getString("ExpertName"), destRs.getString("SpeciesExpertName"), id); //mapping ExpertName => SpeciesExpertName according to SQL script
|
528
|
success &= equals(table + " Last Action", srcRsLastAction.getString("action_name"), destStr == null? null : destStr, id);
|
529
|
success &= equals(table + " Last Action Date", srcRsLastAction.getTimestamp("sessiondate"), destRs.getTimestamp("LastActionDate"), id);
|
530
|
|
531
|
return success;
|
532
|
} catch (Exception e) {
|
533
|
e.printStackTrace();
|
534
|
throw e;
|
535
|
}
|
536
|
}
|
537
|
|
538
|
private boolean compareKingdom(String messageStart, ResultSet srcRS, ResultSet destRS, String id) throws SQLException {
|
539
|
String srcKingdom = getSourceKingdomFk(srcRS, id);
|
540
|
Integer intDest = nullSafeInt(destRS, "KingdomFk");
|
541
|
if (intDest == null){
|
542
|
logger.warn(id +": " + messageStart + " must never be null for destination. Biota needs to be 0, all the rest needs to have >0 int value.");
|
543
|
return false;
|
544
|
}else{
|
545
|
return equals(messageStart, srcKingdom, String.valueOf(intDest), id);
|
546
|
}
|
547
|
}
|
548
|
|
549
|
private String getSourceKingdomFk(ResultSet srcRS, String id) throws SQLException {
|
550
|
String strSrc = srcRS.getString("acc_sp");
|
551
|
if (strSrc == null){
|
552
|
strSrc = srcRS.getString("tu_sp");
|
553
|
}
|
554
|
if (strSrc == null){
|
555
|
if ("1".equals(id)){
|
556
|
strSrc = "0"; //Biota
|
557
|
}else if ("147415".equals(id)){
|
558
|
strSrc = "6"; //Monera is synonym of Bacteria
|
559
|
}else{
|
560
|
strSrc = id;
|
561
|
}
|
562
|
}else{
|
563
|
strSrc = strSrc.substring(1);
|
564
|
strSrc = strSrc.substring(0, strSrc.indexOf("#"));
|
565
|
}
|
566
|
return strSrc;
|
567
|
}
|
568
|
|
569
|
private boolean testSingleTaxonRelations(int n) throws SQLException {
|
570
|
boolean success = true;
|
571
|
ResultSet srcRS = source.getResultSet(""
|
572
|
+ " SELECT t.* "
|
573
|
+ " FROM tu t "
|
574
|
+ " WHERE t.id "+ moneraFilter + " AND tu_accfinal <> id "
|
575
|
+ " ORDER BY CAST(t.id as nvarchar(20)) ");
|
576
|
ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
|
577
|
+ " FROM RelTaxon rel "
|
578
|
+ " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
|
579
|
+ " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
|
580
|
+ " WHERE t1."+origErms+" AND t2." + origErms
|
581
|
+ " ORDER BY t1.IdInSource");
|
582
|
int i = 0;
|
583
|
while (srcRS.next() && destRS.next()){
|
584
|
success &= testSingleTaxonRelation(srcRS, destRS);
|
585
|
i++;
|
586
|
}
|
587
|
success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
|
588
|
return success;
|
589
|
}
|
590
|
|
591
|
private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
|
592
|
String id = String.valueOf(srcRS.getInt("id"));
|
593
|
boolean success = equals("Taxon relation taxon1", "tu_id: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
|
594
|
success &= equals("Taxon relation taxon2", "tu_id: " + srcRS.getInt("tu_accfinal"), destRS.getString("t2Id"), id);
|
595
|
success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
|
596
|
success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
|
597
|
success &= isNull("notes", destRS, id);
|
598
|
//complete if no further relations need to be added
|
599
|
return success;
|
600
|
}
|
601
|
|
602
|
private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
|
603
|
boolean success = true;
|
604
|
ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, MN.*, s.*, su.sourceuse_name "
|
605
|
+ " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
|
606
|
+ " LEFT JOIN sources s ON s.id = MN.source_id "
|
607
|
+ " LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
|
608
|
+ " WHERE MN.tu_id " + moneraFilter
|
609
|
+ " ORDER BY CAST(tu.id as nvarchar(20)), MN.sourceuse_id, s.id "); //, no.note (not possible because ntext
|
610
|
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
|
611
|
+ " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
|
612
|
+ " INNER JOIN Source s ON s.SourceId = ats.SourceFk "
|
613
|
+ " LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
|
614
|
+ " WHERE t."+origErms
|
615
|
+ " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
|
616
|
int count = 0;
|
617
|
while (srcRs.next() && destRs.next()){
|
618
|
success &= testSingleAdditionalTaxonSource(srcRs, destRs);
|
619
|
count++;
|
620
|
}
|
621
|
success &= equals("Notes count differs", n, count, "-1");
|
622
|
return success;
|
623
|
}
|
624
|
|
625
|
private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
626
|
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
|
627
|
boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
|
628
|
success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id); //currently we use the same id in ERMS and PESI
|
629
|
success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
|
630
|
success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
|
631
|
//TODO some records are still truncated ~ >820 characters
|
632
|
success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
|
633
|
success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
|
634
|
//Complete
|
635
|
return success;
|
636
|
}
|
637
|
|
638
|
private boolean testSingleNotes(int n) throws SQLException {
|
639
|
boolean success = true;
|
640
|
ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as nvarchar(20)) tuId, no.*, l.LanName "
|
641
|
+ " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
|
642
|
+ " LEFT JOIN languages l ON l.LanID = no.lan_id "
|
643
|
+ " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable "); //, no.note (not possible because ntext
|
644
|
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
|
645
|
+ " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
|
646
|
+ " LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
|
647
|
+ " LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
|
648
|
+ " WHERE t." + origErms
|
649
|
+ " AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
|
650
|
+ " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1 ");
|
651
|
int count = 0;
|
652
|
ResultSet srcRsLastAction = source.getResultSet(""
|
653
|
+ " SELECT no.id, s.sessiondate, a.action_name, s.ExpertName "
|
654
|
+ " FROM notes no "
|
655
|
+ " INNER JOIN tu ON tu.id = no.tu_id "
|
656
|
+ " LEFT JOIN languages l ON l.LanID = no.lan_id"
|
657
|
+ " LEFT JOIN notes_sessions MN ON no.id = MN.note_id "
|
658
|
+ " LEFT JOIN actions a ON a.id = MN.action_id "
|
659
|
+ " LEFT JOIN sessions s ON s.id = MN.session_id "
|
660
|
+ " ORDER BY CAST(tu.id as nvarchar(20)), no.type, no.noteSortable, s.sessiondate DESC, a.id DESC ");
|
661
|
|
662
|
while (srcRs.next() && destRs.next()){
|
663
|
success &= testSingleNote(srcRs, destRs);
|
664
|
success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "Note");
|
665
|
count++;
|
666
|
}
|
667
|
success &= equals("Notes count differs", n, count, "-1");
|
668
|
return success;
|
669
|
}
|
670
|
|
671
|
private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
672
|
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
|
673
|
boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
|
674
|
success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
|
675
|
success &= isNull("Note_2", destRs, id);
|
676
|
success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
|
677
|
success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
|
678
|
success &= isNull("Region", destRs, id);
|
679
|
success &= isNull("SpeciesExpertGUID", destRs, id);
|
680
|
//SpeciesExpertName, LastAction, LastActionDate handled in separate method
|
681
|
//complete
|
682
|
return success;
|
683
|
}
|
684
|
|
685
|
private String normalizeNoteCatCache(String string) {
|
686
|
return StringUtils.capitalize(string)
|
687
|
.replace("Original Combination", "Original combination")
|
688
|
.replace("Taxonomic remark", "Taxonomic Remark");
|
689
|
}
|
690
|
|
691
|
private boolean testSingleDistributions(int n) throws SQLException {
|
692
|
boolean success = true;
|
693
|
ResultSet srcRs = source.getResultSet("SELECT CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)) tuId,"
|
694
|
+ " gu.gazetteer_id, dr.*, gu.id guId, gu.gu_name "
|
695
|
+ " FROM dr INNER JOIN tu ON dr.tu_id = tu.id "
|
696
|
+ " LEFT JOIN gu ON gu.id = dr.gu_id "
|
697
|
+ " ORDER BY CAST(ISNULL(tu.tu_accfinal, tu.id) as nvarchar(20)), gu.gazetteer_id, gu.gu_name, dr.noteSortable "); //, dr.note (not possible because ntext
|
698
|
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaERMSGazetteerId, oc.*, a.AreaName "
|
699
|
+ " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
|
700
|
+ " LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
|
701
|
+ " WHERE t." + origErms
|
702
|
+ " ORDER BY t.IdInSource, a.AreaERMSGazetteerId, a.AreaName, oc.Notes ");
|
703
|
ResultSet srcRsLastAction = source.getResultSet(""
|
704
|
+ " SELECT dr.id, s.sessiondate, a.action_name, s.ExpertName "
|
705
|
+ " FROM dr "
|
706
|
+ " INNER JOIN tu ON tu.id = dr.tu_id "
|
707
|
+ " LEFT JOIN gu ON gu.id = dr.gu_id "
|
708
|
+ " LEFT JOIN dr_sessions MN ON dr.id = MN.dr_id "
|
709
|
+ " LEFT JOIN actions a ON a.id = MN.action_id "
|
710
|
+ " LEFT JOIN sessions s ON s.id = MN.session_id "
|
711
|
+ " ORDER BY CAST(tu.id as nvarchar(20)), gu.gazetteer_id, gu.gu_name, s.sessiondate DESC, a.id DESC ");
|
712
|
int count = 0;
|
713
|
while (srcRs.next() && destRs.next()){
|
714
|
success &= testSingleDistribution(srcRs, destRs);
|
715
|
//there are >1000 duplicates in dr, therefore this creates lots of warnings
|
716
|
success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "Distribution");
|
717
|
count++;
|
718
|
}
|
719
|
success &= equals("Distribution count differs", n, count, "-1");
|
720
|
return success;
|
721
|
}
|
722
|
|
723
|
private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
724
|
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("gu_name"));
|
725
|
boolean success = equals("Distribution taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
|
726
|
success &= equals("Distribution gazetteer_id ", srcRs.getString("gazetteer_id"), destRs.getString("AreaERMSGazetteerId"), id);
|
727
|
success &= equals("Distribution area name ", srcRs.getString("gu_name"), destRs.getString("AreaName"), id);
|
728
|
success &= equals("Distribution area name cache", srcRs.getString("gu_name"), destRs.getString("AreaNameCache"), id);
|
729
|
success &= equals("Distribution OccurrenceStatusFk", 1, destRs.getInt("OccurrenceStatusFk"), id);
|
730
|
success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
|
731
|
//TODO see comments
|
732
|
success &= isNull("SourceFk", destRs, id); //sources should be moved to extra table only, check with script and PESI 2014 (=> has values for ERMS)
|
733
|
success &= isNull("SourceCache", destRs, id); //sources should be moved to extra table, check with script and PESI 2014 (=> has values for ERMS)
|
734
|
success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
|
735
|
success &= isNull("SpeciesExpertGUID", destRs, id); //SpeciesExpertGUID does not exist in ERMS
|
736
|
//SpeciesExpertName,LastAction,LastActionDate handled in separate method
|
737
|
return success;
|
738
|
}
|
739
|
|
740
|
private boolean testSingleCommonNames(int n) throws SQLException {
|
741
|
boolean success = true;
|
742
|
ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
|
743
|
+ " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
|
744
|
+ " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
|
745
|
ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
|
746
|
+ " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
|
747
|
+ " WHERE t." + origErms
|
748
|
+ " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate "); //sorting also lastActionDate results in a minimum of exact duplicate problems
|
749
|
ResultSet srcRsLastAction = source.getResultSet(""
|
750
|
+ " SELECT v.id, s.sessiondate, a.action_name, s.ExpertName "
|
751
|
+ " FROM vernaculars v "
|
752
|
+ " INNER JOIN tu ON tu.id = v.tu_id "
|
753
|
+ " LEFT JOIN languages l ON l.LanID = v.lan_id"
|
754
|
+ " LEFT JOIN vernaculars_sessions MN ON v.id = MN.vernacular_id "
|
755
|
+ " LEFT JOIN actions a ON a.id = MN.action_id "
|
756
|
+ " LEFT JOIN sessions s ON s.id = MN.session_id "
|
757
|
+ " ORDER BY CAST(tu.id as nvarchar(20)), ISNULL([639_3],[639_2]), v.vername, v.id, s.sessiondate DESC, a.id DESC ");
|
758
|
int count = 0;
|
759
|
while (srcRs.next() && destRs.next()){
|
760
|
success &= testSingleCommonName(srcRs, destRs);
|
761
|
success &= testLastAction(srcRsLastAction, destRs, String.valueOf(srcRs.getInt("id")), "CommonName");
|
762
|
count++;
|
763
|
}
|
764
|
success &= equals("Common name count differs", n, count, "-1");
|
765
|
return success;
|
766
|
}
|
767
|
|
768
|
boolean prefer639_3 = true;
|
769
|
String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
|
770
|
String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
|
771
|
|
772
|
private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
773
|
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
|
774
|
boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
|
775
|
success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
|
776
|
success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
|
777
|
success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
|
778
|
//TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
|
779
|
success &= isNull("Region", destRs, id); //region does not seem to exist in ERMS
|
780
|
//TODO see comments
|
781
|
// success &= isNull("SourceFk", destRs); //sources should be moved to extra table, check with PESI 2014
|
782
|
// success &= isNull("SourceNameCache", destRs); //sources should be moved to extra table, check with PESI 2014
|
783
|
success &= isNull("SpeciesExpertGUID", destRs, id); //SpeciesExpertGUID does not exist in ERMS
|
784
|
//SpeciesExpertName,LastAction,LastActionDate handled in separate method
|
785
|
//complete
|
786
|
return success;
|
787
|
}
|
788
|
|
789
|
private String normalizeLang(String string) {
|
790
|
if ("Spanish".equals(string)){
|
791
|
return "Spanish, Castillian";
|
792
|
}else if ("Modern Greek (1453-)".equals(string)){
|
793
|
return "Greek";
|
794
|
}else if ("Malay (individual language)".equals(string)){
|
795
|
return "Malay";
|
796
|
}else if ("Swahili (individual language)".equals(string)){
|
797
|
return "Swahili";
|
798
|
}
|
799
|
|
800
|
return string;
|
801
|
}
|
802
|
|
803
|
private String getLanguageIso(ResultSet destRs) throws SQLException {
|
804
|
String result = destRs.getString(preferredISO639);
|
805
|
if (result == null){
|
806
|
result = destRs.getString(alternativeISO639);
|
807
|
}
|
808
|
return result;
|
809
|
}
|
810
|
|
811
|
private boolean testSingleReferences() throws SQLException {
|
812
|
boolean success = true;
|
813
|
ResultSet srcRS = source.getResultSet("SELECT s.* FROM sources s ORDER BY s.id ");
|
814
|
ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
|
815
|
+ " WHERE s." + origErms
|
816
|
+ " ORDER BY s.RefIdInSource "); // +1 for the source reference "erms" but this has no OriginalDB
|
817
|
while (srcRS.next() && destRS.next()){
|
818
|
success &= testSingleReference(srcRS, destRS);
|
819
|
}
|
820
|
return success;
|
821
|
}
|
822
|
|
823
|
private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
|
824
|
String id = String.valueOf(srcRS.getInt("id"));
|
825
|
boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
|
826
|
success &= equals("Reference IMIS_id ", srcRS.getString("imis_id"), destRS.getString("IMIS_Id"), id);
|
827
|
success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("source_type")), destRS.getInt("SourceCategoryFk"), id);
|
828
|
success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("source_type")), destRS.getString("SourceCategoryCache"), id);
|
829
|
success &= equals("Reference name ", srcRS.getString("source_name"), destRS.getString("Name"), id);
|
830
|
success &= equals("Reference abstract ", srcRS.getString("source_abstract"), destRS.getString("Abstract"), id);
|
831
|
success &= equals("Reference title ", srcRS.getString("source_title"), destRS.getString("Title"), id);
|
832
|
success &= equals("Reference author string ", srcRS.getString("source_author"), destRS.getString("AuthorString"), id);
|
833
|
success &= equals("Reference year ", normalizeYear(srcRS.getString("source_year")), destRS.getString("RefYear"), id);
|
834
|
success &= isNull("NomRefCache", destRS, id); //for ERMS no other value was found in 2014 value
|
835
|
success &= equals("Reference link ", srcRS.getString("source_link"), destRS.getString("Link"), id);
|
836
|
success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
|
837
|
//complete
|
838
|
return success;
|
839
|
}
|
840
|
|
841
|
private Integer convertSourceTypeFk(String sourceType) {
|
842
|
if (sourceType == null){
|
843
|
return null;
|
844
|
}else if ("d".equals(sourceType)){
|
845
|
return 4;
|
846
|
}else if ("e".equals(sourceType)){
|
847
|
return 5;
|
848
|
}else if ("p".equals(sourceType)){
|
849
|
return 11;
|
850
|
}
|
851
|
return null;
|
852
|
}
|
853
|
private String convertSourceTypeCache(String sourceType) {
|
854
|
if (sourceType == null){
|
855
|
return null;
|
856
|
}else if ("d".equals(sourceType)){
|
857
|
return "database";
|
858
|
}else if ("e".equals(sourceType)){
|
859
|
return "informal reference";
|
860
|
}else if ("p".equals(sourceType)){
|
861
|
return "publication";
|
862
|
}
|
863
|
return null;
|
864
|
}
|
865
|
|
866
|
private boolean testReferenceCount() {
|
867
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM sources ");
|
868
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origErms); // +1 for the source reference "erms" but this has no OriginalDB
|
869
|
boolean success = equals("Reference count ", countSrc, countDest, "-1");
|
870
|
return success;
|
871
|
}
|
872
|
|
873
|
//NOTE: there could be better parsing of source_year during import, this may also need better normalizing in the database
|
874
|
private String normalizeYear(String yearStr) {
|
875
|
if (StringUtils.isBlank(yearStr)){
|
876
|
return yearStr;
|
877
|
}
|
878
|
yearStr = yearStr.trim();
|
879
|
if (yearStr.matches("\\d{4}-\\d{2}")){
|
880
|
yearStr = yearStr.substring(0, 5)+yearStr.substring(0, 2)+yearStr.substring(5);
|
881
|
}
|
882
|
if (yearStr.equals("20 Mar 1891")){
|
883
|
yearStr = "20.3.1891";
|
884
|
}
|
885
|
if (yearStr.equals("July 1900")){
|
886
|
yearStr = "7.1900";
|
887
|
}
|
888
|
return yearStr;
|
889
|
}
|
890
|
|
891
|
private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
|
892
|
if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
|
893
|
String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+ srcDate + "; Destination: " + destDate;
|
894
|
logger.warn(message);
|
895
|
return false;
|
896
|
}else{
|
897
|
logger.info(messageStart + " were equal: " + srcDate);
|
898
|
return true;
|
899
|
}
|
900
|
}
|
901
|
|
902
|
private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
|
903
|
String strId = id.equals("-1")? "": (id+ ": ");
|
904
|
if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
|
905
|
String message = strId+ messageStart + " must be equal, but was not.\n Source: "+ nSrc + "; Destination: " + nDest;
|
906
|
logger.warn(message);
|
907
|
return false;
|
908
|
}else{
|
909
|
logger.info(strId + messageStart + " were equal: " + nSrc);
|
910
|
return true;
|
911
|
}
|
912
|
}
|
913
|
|
914
|
//** ************* MAIN ********************************************/
|
915
|
|
916
|
public static void main(String[] args){
|
917
|
PesiErmsValidator validator = new PesiErmsValidator();
|
918
|
validator.invoke(defaultSource, defaultDestination);
|
919
|
System.exit(0);
|
920
|
}
|
921
|
}
|