1
|
/**
|
2
|
* Copyright (C) 2019 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.app.pesi.validate;
|
10
|
|
11
|
import java.sql.ResultSet;
|
12
|
import java.sql.SQLException;
|
13
|
import java.sql.Timestamp;
|
14
|
import java.time.LocalDate;
|
15
|
import java.util.UUID;
|
16
|
|
17
|
import org.apache.commons.lang.StringUtils;
|
18
|
import org.apache.log4j.Logger;
|
19
|
|
20
|
import eu.etaxonomy.cdm.app.common.CdmDestinations;
|
21
|
import eu.etaxonomy.cdm.app.common.PesiDestinations;
|
22
|
import eu.etaxonomy.cdm.app.pesi.EuroMedSourceActivator;
|
23
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
24
|
import eu.etaxonomy.cdm.common.UTF8;
|
25
|
import eu.etaxonomy.cdm.database.ICdmDataSource;
|
26
|
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
|
27
|
import eu.etaxonomy.cdm.io.common.Source;
|
28
|
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
|
29
|
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
|
30
|
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
|
31
|
|
32
|
/**
|
33
|
* Tests the E+M -> PESI pipeline by comparing the source DB with destination PESI DB.
|
34
|
*
|
35
|
* @author a.mueller
|
36
|
* @since 08.10.2019
|
37
|
*/
|
38
|
public class PesiEuroMedValidator extends PesiValidatorBase {
|
39
|
|
40
|
private static final Logger logger = Logger.getLogger(PesiEuroMedValidator.class);
|
41
|
|
42
|
private static final ICdmDataSource defaultSource = CdmDestinations.cdm_test_local_mysql_euromed();
|
43
|
// private static final ICdmDataSource defaultSource = CdmDestinations.cdm_pesi2019_final();
|
44
|
private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI();
|
45
|
// private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI_2();
|
46
|
|
47
|
boolean doReferences = false;
|
48
|
boolean doTaxa = true;
|
49
|
boolean doTaxRels = false;
|
50
|
boolean doDistributions = false;
|
51
|
boolean doCommonNames = false;
|
52
|
boolean doNotes = false;
|
53
|
boolean doAdditionalTaxonSources = false;
|
54
|
|
55
|
private Source source = new Source(defaultSource);
|
56
|
private Source destination = defaultDestination;
|
57
|
|
58
|
private String origEuroMed = "OriginalDB = 'E+M' ";
|
59
|
|
60
|
public void invoke(Source source, Source destination){
|
61
|
logger.warn("Validate destination " + destination.getDatabase());
|
62
|
boolean success = true;
|
63
|
try {
|
64
|
this.source = source;
|
65
|
this.destination = destination;
|
66
|
success &= testReferences();
|
67
|
success &= testTaxa();
|
68
|
success &= testTaxonRelations();
|
69
|
success &= testDistributions();
|
70
|
success &= testCommonNames();
|
71
|
success &= testNotes();
|
72
|
success &= testAdditionalTaxonSources();
|
73
|
} catch (Exception e) {
|
74
|
e.printStackTrace();
|
75
|
success = false;
|
76
|
}
|
77
|
//TBC
|
78
|
System.out.println("end validation " + (success? "":"NOT ") + "successful.");
|
79
|
}
|
80
|
|
81
|
private boolean testAdditionalTaxonSources() throws SQLException {
|
82
|
if (!doAdditionalTaxonSources){
|
83
|
System.out.println("Ignore validate additional taxon sources");
|
84
|
return true;
|
85
|
}
|
86
|
System.out.println("Start validate additional taxon sources");
|
87
|
boolean success = testAdditionalTaxonSourcesCount();
|
88
|
if (success){
|
89
|
success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
|
90
|
}
|
91
|
return success;
|
92
|
}
|
93
|
|
94
|
private boolean testNotes() throws SQLException {
|
95
|
if (!doNotes){
|
96
|
System.out.println("Ignore validate notes");
|
97
|
return true;
|
98
|
}
|
99
|
System.out.println("Start validate notes");
|
100
|
boolean success = testNotesCount();
|
101
|
if (success){
|
102
|
success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
|
103
|
}
|
104
|
return success;
|
105
|
}
|
106
|
|
107
|
private boolean testDistributions() throws SQLException {
|
108
|
if (!doDistributions){
|
109
|
System.out.println("Ignore validate distributions");
|
110
|
return true;
|
111
|
}
|
112
|
System.out.println("Start validate distributions");
|
113
|
boolean success = testDistributionCount();
|
114
|
if (!success){
|
115
|
success &= testSingleDistributions(source.getUniqueInteger(distributionCountSQL));
|
116
|
}
|
117
|
return success;
|
118
|
}
|
119
|
|
120
|
private boolean testCommonNames() throws SQLException {
|
121
|
if (!doCommonNames){
|
122
|
System.out.println("Ignore validate common names");
|
123
|
return true;
|
124
|
}
|
125
|
System.out.println("Start validate common names");
|
126
|
boolean success = testCommonNameCount();
|
127
|
if (success){
|
128
|
success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
|
129
|
}
|
130
|
return success;
|
131
|
}
|
132
|
|
133
|
int countSynonyms;
|
134
|
int countIncludedIns;
|
135
|
private boolean testTaxonRelations() throws SQLException {
|
136
|
if (!doTaxRels){
|
137
|
System.out.println("Ignore validate taxon relations");
|
138
|
return true;
|
139
|
}
|
140
|
System.out.println("Start validate taxon relations");
|
141
|
boolean success = testSynonymRelations();
|
142
|
success &= testIncludedInRelations();
|
143
|
success &= testTotalRelations();
|
144
|
success &= testNameRelations();
|
145
|
return success;
|
146
|
}
|
147
|
|
148
|
private boolean testTotalRelations() {
|
149
|
if (!(countSynonyms < 0 || countIncludedIns < 0)){
|
150
|
int countTotalSrc = countSynonyms + countIncludedIns;
|
151
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
|
152
|
boolean success = equals("Taxrel count + 1 must be same as source taxon count ", countTotalSrc+1, countSrc, String.valueOf(-1));
|
153
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t."+ origEuroMed);
|
154
|
success &= equals("Taxrel count + 1 must be same as destination taxon count ", countTotalSrc+1, countDest, String.valueOf(-1));
|
155
|
return success;
|
156
|
}else{
|
157
|
return false;
|
158
|
}
|
159
|
}
|
160
|
|
161
|
private final String countSynonymRelation = "SELECT count(*) FROM TaxonBase syn LEFT JOIN TaxonBase acc ON syn.acceptedTaxon_id = acc.id WHERE syn.publish = 1 AND acc.publish = 1 ";
|
162
|
private boolean testSynonymRelations() throws SQLException {
|
163
|
|
164
|
int countSrc = source.getUniqueInteger(countSynonymRelation);
|
165
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
|
166
|
boolean success = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
|
167
|
if (success){
|
168
|
//TODO test single synonym relations
|
169
|
success &= testSingleSynonymRelations(source.getUniqueInteger(countSynonymRelation));
|
170
|
}
|
171
|
countSynonyms = (countSrc == countDest)? countSrc : -1;
|
172
|
return success;
|
173
|
}
|
174
|
|
175
|
private boolean testSingleSynonymRelations(int n) throws SQLException {
|
176
|
boolean success = true;
|
177
|
ResultSet srcRS = source.getResultSet(""
|
178
|
+ " SELECT t.id tid, pt.id pid "
|
179
|
+ " FROM TaxonNode tn "
|
180
|
+ " INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
|
181
|
+ " LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
|
182
|
+ " LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
|
183
|
+ " WHERE t.publish = 1 && pt.publish = 1 "
|
184
|
+ " ORDER BY CAST(tb.id as char(20)) ");
|
185
|
|
186
|
ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
|
187
|
+ " FROM RelTaxon rel "
|
188
|
+ " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
|
189
|
+ " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
|
190
|
+ " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk > 101 "
|
191
|
+ " ORDER BY t1.IdInSource");
|
192
|
int i = 0;
|
193
|
while (srcRS.next() && destRS.next()){
|
194
|
success &= testSingleSynonymRelation(srcRS, destRS);
|
195
|
i++;
|
196
|
}
|
197
|
success &= equals("Synonym relation count for single compare", n, i, String.valueOf(-1));
|
198
|
return success;
|
199
|
}
|
200
|
|
201
|
private boolean testSingleSynonymRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
|
202
|
String id = String.valueOf(srcRS.getInt("id"));
|
203
|
boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
|
204
|
success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("tu_accfinal"), destRS.getString("t2Id"), id);
|
205
|
success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
|
206
|
success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
|
207
|
//TODO enable after next import
|
208
|
// success &= isNull("notes", destRS);
|
209
|
//complete if no further relations need to added
|
210
|
return success;
|
211
|
}
|
212
|
|
213
|
private boolean testNameRelations() {
|
214
|
//Name relations
|
215
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM NameRelationship WHERE ("
|
216
|
+ " 1=1 "
|
217
|
+ ")");
|
218
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
|
219
|
boolean success = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
|
220
|
if (success){
|
221
|
//TODO test single name relation
|
222
|
// success &= testSingleNameRelations(source.getUniqueInteger(countSynonymRelation));
|
223
|
}
|
224
|
return success;
|
225
|
}
|
226
|
|
227
|
private final String countParentRelation = "SELECT count(*) "
|
228
|
+ " FROM TaxonNode tn "
|
229
|
+ " INNER JOIN TaxonBase tb ON tn.taxon_id = tb.id "
|
230
|
+ " LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
|
231
|
+ " LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
|
232
|
+ " WHERE tb.publish = 1 && pt.publish = 1 ";
|
233
|
|
234
|
private boolean testIncludedInRelations() throws SQLException {
|
235
|
int countSrc = source.getUniqueInteger(countParentRelation);
|
236
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
|
237
|
boolean success = equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
|
238
|
if (success){
|
239
|
success &= testSingleTaxonRelations(source.getUniqueInteger(countParentRelation));
|
240
|
}
|
241
|
countIncludedIns = (countSrc == countDest)? countSrc : -1;
|
242
|
return success;
|
243
|
}
|
244
|
|
245
|
private boolean testTaxa() throws SQLException {
|
246
|
if (!doTaxa){
|
247
|
System.out.println("Ignore validate taxa");
|
248
|
return true;
|
249
|
}
|
250
|
System.out.println("Start validate taxa");
|
251
|
boolean success = testTaxaCount();
|
252
|
if (success){
|
253
|
success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
|
254
|
}
|
255
|
return success;
|
256
|
}
|
257
|
|
258
|
String countReferencesStr = "SELECT count(*) FROM reference ";
|
259
|
private boolean testReferences() throws SQLException {
|
260
|
if (!doReferences){
|
261
|
System.out.println("Ignore validate references");
|
262
|
return true;
|
263
|
}
|
264
|
System.out.println("Start validate references");
|
265
|
boolean success = testReferenceCount();
|
266
|
if (success){
|
267
|
success &= testSingleReferences(source.getUniqueInteger(countReferencesStr));
|
268
|
}
|
269
|
return success;
|
270
|
}
|
271
|
|
272
|
private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts ";
|
273
|
private boolean testAdditionalTaxonSourcesCount() {
|
274
|
int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
|
275
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
|
276
|
return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
|
277
|
}
|
278
|
|
279
|
private boolean testNotesCount() {
|
280
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
|
281
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
|
282
|
+ " WHERE (1=1) ");
|
283
|
boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
|
284
|
|
285
|
return result;
|
286
|
}
|
287
|
|
288
|
private String distributionCountWhere = " WHERE deb.DTYPE = 'Distribution' AND tb.publish = 1 AND a.uuid NOT IN ("
|
289
|
+ "'111bdf38-7a32-440a-9808-8af1c9e54b51'," //E+M
|
290
|
//Former UUSR
|
291
|
+ "'c4a898ce-0f32-44fe-a8a3-278e11a4ba53','a575d608-dd53-4c01-b2af-5067d0711f64','da4e9cc3-b1cc-403a-81ff-bcc5d9fadbd1',"
|
292
|
+ "'7e0f8fa3-5db9-48f0-9fa8-87fcab3eaa53','2188e3a5-0446-47c8-b11b-b4b2b9a71c75','44f262e3-5091-4d28-8081-440d3978fb0b',"
|
293
|
+ "'efabc8fd-0b3c-475b-b532-e1ca0ba0bdbb') ";
|
294
|
private String distributionCountSQL = "SELECT count(*) as n "
|
295
|
+ " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
|
296
|
+ " LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
|
297
|
+ " LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
|
298
|
+ distributionCountWhere;
|
299
|
private boolean testDistributionCount() {
|
300
|
int countSrc = source.getUniqueInteger(distributionCountSQL);
|
301
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
|
302
|
return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
|
303
|
}
|
304
|
|
305
|
private boolean testCommonNameCount() {
|
306
|
int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
|
307
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
|
308
|
return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
|
309
|
}
|
310
|
|
311
|
private final String countTaxon = "SELECT count(*) FROM TaxonBase tb WHERE tb.publish = 1 ";
|
312
|
private final String destTaxonFilter = "(t.SourceFk IS NOT NULL OR t.AuthorString like 'auct.%' OR t.AuthorString like 'sensu %')";
|
313
|
private boolean testTaxaCount() {
|
314
|
int countSrc = source.getUniqueInteger(countTaxon);
|
315
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE "+ destTaxonFilter);
|
316
|
boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
|
317
|
return result;
|
318
|
}
|
319
|
|
320
|
private boolean testSingleTaxa(int n) throws SQLException {
|
321
|
boolean success = true;
|
322
|
ResultSet srcRS = source.getResultSet("SELECT CAST(tn.id as char(20)) tid, tb.uuid as GUID, pt.id parentId, "
|
323
|
+ " tn.rank_id, rank.titleCache rank_name, "
|
324
|
+ " sec.titleCache secTitle, secAut.titleCache secAutTitle, "
|
325
|
+ " tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
|
326
|
+ " tn.nameCache, tn.authorshipCache, tn.titleCache nameTitleCache, tn.fullTitleCache nameFullTitleCache, "
|
327
|
+ " tb.DTYPE taxStatus, tb.titleCache, tb.appendedPhrase tbAppendedPhrase, tb.sec_id secId, "
|
328
|
+ " taxRelType.uuid taxRelTypeUuid, tr.relatedTo_id relToTaxonId, "
|
329
|
+ " nsType.id nsId, nsType.idInVocabulary nsTitle, "
|
330
|
+ " typeName_id, typeName.titleCache typeFullNameCache, "
|
331
|
+ " CASE WHEN tb.updated IS NOT NULL THEN tb.updated ELSE tb.created END as lastActionDate, "
|
332
|
+ " CASE WHEN tb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
|
333
|
+ " FROM TaxonBase tb "
|
334
|
+ " LEFT JOIN TaxonName tn on tb.name_id = tn.id "
|
335
|
+ " LEFT JOIN DefinedTermBase rank ON rank.id = tn.rank_id "
|
336
|
+ " LEFT JOIN Reference sec ON sec.id = tb.sec_id "
|
337
|
+ " LEFT JOIN AgentBase secAut ON secAut.id = sec.authorship_id "
|
338
|
+ " LEFT JOIN TaxonName_NomenclaturalStatus nsMN ON tn.id = nsMN.TaxonName_id "
|
339
|
+ " LEFT JOIN NomenclaturalStatus ns ON ns.id = nsMN.status_id "
|
340
|
+ " LEFT JOIN DefinedTermBase nsType ON nsType.id = ns.type_id "
|
341
|
+ " LEFT JOIN TaxonName_TypeDesignationBase typeMN ON typeMN.TaxonName_id = tn.id "
|
342
|
+ " LEFT JOIN TypeDesignationBase td ON td.id = typeMN.typedesignations_id "
|
343
|
+ " LEFT JOIN TaxonName typeName ON typeName.id = td.typeName_id "
|
344
|
+ " LEFT JOIN TaxonNode n ON n.taxon_id = tb.id "
|
345
|
+ " LEFT JOIN TaxonNode ptn ON n.parent_id = ptn.id "
|
346
|
+ " LEFT JOIN TaxonBase pt ON pt.id = ptn.taxon_id AND pt.publish = 1 "
|
347
|
+ " LEFT JOIN TaxonRelationship tr ON tr.relatedFrom_id = tb.id "
|
348
|
+ " LEFT JOIN TaxonBase tbRelTo ON tr.relatedTo_id = tbRelTo.id "
|
349
|
+ " LEFT JOIN DefinedTermBase taxRelType ON taxRelType.id = tr.type_id"
|
350
|
+ " WHERE tb.publish = 1 "
|
351
|
+ " GROUP BY tid, GUID, tn.rank_id, rank.titleCache, secTitle,"
|
352
|
+ " tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
|
353
|
+ " tn.nameCache, tn.authorshipCache, tn.titleCache, "
|
354
|
+ " tb.DTYPE, tb.updated, tb.created " //for duplicates caused by >1 name status
|
355
|
+ " ORDER BY tid, GUID, lastActionDate ");
|
356
|
ResultSet destRS = destination.getResultSet("SELECT t.*, "
|
357
|
+ " pt.treeIndex pTreeIndex, pt.IdInSource parentSourceId, " //not needed
|
358
|
+ " s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
|
359
|
+ " FROM Taxon t "
|
360
|
+ " LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
|
361
|
+ " LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
|
362
|
+ " LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
|
363
|
+ " LEFT JOIN Source s ON s.SourceId = t.SourceFk "
|
364
|
+ " WHERE t."+ origEuroMed + " AND " + destTaxonFilter //FIXME remove SourceFk filter is only preliminary for first check
|
365
|
+ " ORDER BY t.IdInSource, t.GUID, t.LastActionDate, AuthorString ");
|
366
|
int i = 0;
|
367
|
logger.error("remove SourceFk filter is only preliminary for first check");
|
368
|
while (srcRS.next() && destRS.next()){
|
369
|
success &= testSingleTaxon(srcRS, destRS);
|
370
|
i++;
|
371
|
}
|
372
|
success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
|
373
|
return success;
|
374
|
}
|
375
|
|
376
|
private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
|
377
|
String id = String.valueOf(srcRS.getInt("tid"));
|
378
|
//TODO decide, according to SQL it also contains the taxon UUID, but in PESI2014 backup I can't find this
|
379
|
boolean success = equals("Taxon ID", "NameId: " + srcRS.getInt("tid"), destRS.getString("IdInSource"), id);
|
380
|
success &= equals("Taxon source", makeSource(srcRS), destRS.getString("sourceName"), id);
|
381
|
|
382
|
success &= equals("Taxon kingdomFk", "3", destRS.getString("KingdomFk"), id);
|
383
|
//difficult to test success &= equals("Taxon rank fk", srcRS.getString("rank_id"), destRS.getString("RankFk"), id);
|
384
|
success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name")), destRS.getString("Rank"), id);
|
385
|
success &= equals("Taxon genusOrUninomial", srcRS.getString("genusOrUninomial"), destRS.getString("GenusOrUninomial"), id) ;
|
386
|
success &= equals("Taxon infraGenericEpithet", srcRS.getString("infraGenericEpithet"), destRS.getString("InfraGenericEpithet"), id) ;
|
387
|
success &= equals("Taxon specificEpithet", srcRS.getString("specificEpithet"), destRS.getString("SpecificEpithet"), id) ;
|
388
|
success &= equals("Taxon infraSpecificEpithet", srcRS.getString("infraSpecificEpithet"), destRS.getString("InfraSpecificEpithet"), id) ;
|
389
|
|
390
|
success &= equals("Taxon websearchname", srcRS.getString("nameCache"), destRS.getString("WebSearchName"), id);
|
391
|
//TODO success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
|
392
|
success &= equals("Taxon authority", makeAuthorship(srcRS), destRS.getString("AuthorString"), id);
|
393
|
success &= equals("Taxon FullName", makeFullName(srcRS), destRS.getString("FullName"), id);
|
394
|
success &= equals("Taxon NomRefString", makeNomRefString(srcRS), destRS.getString("NomRefString"), id);
|
395
|
// success &= equals("Taxon DisplayName", makeDisplayName(srcRS), destRS.getString("DisplayName"), id); //in ERMS according to SQL script same as FullName, no nom.ref. information attached
|
396
|
//difficult to test success &= equals("Taxon NameStatusFk", nullSafeInt(srcRS, "nsId"),nullSafeInt( destRS,"NameStatusFk"), id);
|
397
|
success &= equals("Taxon NameStatusCache", srcRS.getString("nsTitle"), destRS.getString("NameStatusCache"), id);
|
398
|
|
399
|
//TODO mostly Taxonomically Valueless
|
400
|
// success &= equals("Taxon TaxonStatusFk", mapTaxStatusFk(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), nullSafeInt( destRS,"TaxonStatusFk"), id);
|
401
|
// success &= equals("Taxon TaxonStatusCache", mapTaxStatus(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), destRS.getString("TaxonStatusCache"), id);
|
402
|
|
403
|
success &= equals("Taxon ParentTaxonFk", nullSafeInt(srcRS, "parentId"), nullSafeInt(destRS, "ParentTaxonFk"), id);
|
404
|
|
405
|
Integer origTypeNameFk = nullSafeInt(srcRS, "typeName_id");
|
406
|
success &= equals("Taxon TypeNameFk", origTypeNameFk == null? null : "NameId: " + origTypeNameFk, destRS.getString("typeSourceId"), id);
|
407
|
success &= equals("Taxon TypeFullNameCache", srcRS.getString("typeFullNameCache"), destRS.getString("TypeFullNameCache"), id);
|
408
|
//according to SQL always constant, could be changed in future
|
409
|
success &= equals("Taxon QualityStatusFK", 2, nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
|
410
|
success &= equals("Taxon QualityStatusCache", "Added by Database Management Team", destRS.getString("QualityStatusCache"), id);
|
411
|
success &= testTreeIndex(destRS, "TreeIndex", "pTreeIndex", id);
|
412
|
success &= isNull("FossilStatusFk", destRS, id);
|
413
|
success &= isNull("FossilStatusCache", destRS, id);
|
414
|
success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
|
415
|
success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
|
416
|
success &= isNull("ExpertGUID", destRS, id); //according to SQL + PESI2014
|
417
|
success &= isNull("SpeciesExpertGUID", destRS, id);
|
418
|
//ExpertName = SpeciesExpertName in E+M according to SQL script, 4689x NULL
|
419
|
success &= equals("Taxon ExpertName", makeExpertName(srcRS), destRS.getString("ExpertName"), id);
|
420
|
success &= equals("Taxon SpeciesExpertName", makeExpertName(srcRS), destRS.getString("SpeciesExpertName"), id);
|
421
|
//FIXME !! success &= equals("Taxon cache citation", srcRS.getString("secTitle"), destRS.getString("CacheCitation"), id);
|
422
|
success &= equals("Taxon Last Action", srcRS.getString("lastAction"), destRS.getString("LastAction"), id);
|
423
|
success &= equals("Taxon Last Action Date", srcRS.getTimestamp("lastActionDate"), destRS.getTimestamp("LastActionDate"), id);
|
424
|
|
425
|
success &= isNull("GUID2", destRS, id); //only relevant after merge
|
426
|
success &= isNull("DerivedFromGuid2", destRS, id); //only relevant after merge
|
427
|
return success;
|
428
|
}
|
429
|
|
430
|
private String makeExpertName(ResultSet srcRs) throws SQLException {
|
431
|
String autStr = srcRs.getString("secAutTitle");
|
432
|
if (autStr != null){
|
433
|
return autStr;
|
434
|
}else{
|
435
|
return srcRs.getString("secTitle");
|
436
|
}
|
437
|
}
|
438
|
|
439
|
private String makeSource(ResultSet srcRs) throws SQLException {
|
440
|
String secStr = srcRs.getString("secTitle");
|
441
|
if (secStr == null){
|
442
|
return EuroMedSourceActivator.sourceReferenceTitle;
|
443
|
}else{
|
444
|
return secStr;
|
445
|
}
|
446
|
}
|
447
|
|
448
|
private String makeAuthorship(ResultSet srcRs) throws SQLException {
|
449
|
boolean isMisapplied = isMisapplied(srcRs);
|
450
|
if (isMisapplied){
|
451
|
String result = getMisappliedAuthor(srcRs).trim();
|
452
|
return result;
|
453
|
}else{
|
454
|
return srcRs.getString("authorshipCache");
|
455
|
}
|
456
|
}
|
457
|
|
458
|
private String makeFullName(ResultSet srcRs) throws SQLException {
|
459
|
boolean isMisapplied = isMisapplied(srcRs);
|
460
|
if (isMisapplied){
|
461
|
String result = srcRs.getString("nameCache");
|
462
|
result += getMisappliedAuthor(srcRs);
|
463
|
return result;
|
464
|
}else{
|
465
|
return srcRs.getString("nameTitleCache");
|
466
|
}
|
467
|
}
|
468
|
|
469
|
private String makeNomRefString(ResultSet srcRS) throws SQLException {
|
470
|
//there is no pure nomRefString field in CDM and also computing is only possible
|
471
|
//with cache strategy which requires a running CDM instance. So this is a workaround
|
472
|
//that maybe needs to be adapted
|
473
|
String result = null;
|
474
|
String fullTitle = srcRS.getString("nameFullTitleCache");
|
475
|
String nameTitleCache = srcRS.getString("nameTitleCache");
|
476
|
String nameStatus = CdmUtils.Nz(srcRS.getString("nsTitle"));
|
477
|
if (fullTitle != null && nameTitleCache != null){
|
478
|
result = fullTitle.substring(nameTitleCache.length())
|
479
|
.replaceAll("^, ", "")
|
480
|
.replaceAll("(, |^)"+nameStatus+"$", "")
|
481
|
.replaceAll("\\[as \".*\"\\]", "")
|
482
|
.replaceAll(", nom\\. cons\\., nom\\. altern\\.$", "") //single case with 2 nom. status
|
483
|
.trim();
|
484
|
}
|
485
|
return result;
|
486
|
}
|
487
|
|
488
|
private String mapTaxStatus(String dtype, String taxRelTypeUuidStr) {
|
489
|
Integer statusFk = mapTaxStatusFk(dtype, taxRelTypeUuidStr);
|
490
|
if (statusFk == null){
|
491
|
return null;
|
492
|
}else if (statusFk == PesiTransformer.T_STATUS_ACCEPTED){
|
493
|
return "accepted";
|
494
|
}else if (statusFk == PesiTransformer.T_STATUS_SYNONYM){
|
495
|
return "synonym";
|
496
|
}else if (statusFk == PesiTransformer.T_STATUS_PRO_PARTE_SYN){
|
497
|
return "pro parte synonym";
|
498
|
}else if (statusFk == PesiTransformer.T_STATUS_PARTIAL_SYN){
|
499
|
return "partial synonym";
|
500
|
}
|
501
|
return null;
|
502
|
}
|
503
|
|
504
|
private Integer mapTaxStatusFk(String dtype, String taxRelTypeUuidStr) {
|
505
|
if (dtype == null){
|
506
|
return null;
|
507
|
}else if ("Synonym".equals(dtype)){
|
508
|
return PesiTransformer.T_STATUS_SYNONYM;
|
509
|
}else if ("Taxon".equals(dtype)){
|
510
|
UUID relTypeUuid = taxRelTypeUuidStr == null? null: UUID.fromString(taxRelTypeUuidStr);
|
511
|
if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){
|
512
|
return PesiTransformer.T_STATUS_PRO_PARTE_SYN;
|
513
|
}else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){
|
514
|
return PesiTransformer.T_STATUS_PARTIAL_SYN;
|
515
|
}else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){
|
516
|
return PesiTransformer.T_STATUS_SYNONYM; //no explicit MAN status exists in PESI
|
517
|
}else{
|
518
|
return PesiTransformer.T_STATUS_ACCEPTED;
|
519
|
}
|
520
|
}
|
521
|
return null;
|
522
|
}
|
523
|
|
524
|
private String normalizeRank(String rankStr) {
|
525
|
if (rankStr == null){return null;
|
526
|
}else if (rankStr.equals("Convar")){return "Convariety";
|
527
|
}else if (rankStr.equals("Unranked (infrageneric)")){return "Tax. infragen.";
|
528
|
}else if (rankStr.equals("Unranked (infraspecific)")){return "Tax. infraspec.";
|
529
|
}else if (rankStr.equals("Coll. species")){return "Coll. Species";
|
530
|
}else if (rankStr.equals("Species Aggregate")){return "Aggregate";
|
531
|
}else if (rankStr.equals("Subsection bot.")){return "Subsection";
|
532
|
}return rankStr;
|
533
|
}
|
534
|
|
535
|
private String makeDisplayName(ResultSet srcRs) throws SQLException {
|
536
|
boolean isMisapplied = isMisapplied(srcRs);
|
537
|
|
538
|
String result;
|
539
|
String nameTitle = srcRs.getString("nameTitleCache");
|
540
|
String nameCache = srcRs.getString("nameCache");
|
541
|
if(!isMisapplied){
|
542
|
result = srcRs.getString("nameFullTitleCache");
|
543
|
String taggedName = getTaggedNameTitle(nameCache, nameTitle);
|
544
|
result = result.replace(nameTitle, taggedName);
|
545
|
result = result.replaceAll("^<i>"+ UTF8.HYBRID , UTF8.HYBRID+ "<i>").replaceAll(" "+ UTF8.HYBRID, "</i> "+UTF8.HYBRID+"<i>");
|
546
|
}else{
|
547
|
result = srcRs.getString("nameCache");
|
548
|
String taggedName = getTaggedNameTitle(nameCache, nameCache);
|
549
|
result = result.replace(nameCache, taggedName);
|
550
|
//misapplied
|
551
|
result += getMisappliedAuthor(srcRs);
|
552
|
}
|
553
|
String nameStatus = CdmUtils.Nz(srcRs.getString("nsTitle"));
|
554
|
result = result.replaceAll("(, |^)"+nameStatus+"$", "");
|
555
|
return result;
|
556
|
}
|
557
|
|
558
|
private boolean isMisapplied(ResultSet srcRs) throws SQLException {
|
559
|
String relTypeUuid = srcRs.getString("taxRelTypeUuid");
|
560
|
boolean isMisapplied = relTypeUuid!=null
|
561
|
&& (relTypeUuid.equals(TaxonRelationshipType.uuidMisappliedNameFor.toString())
|
562
|
|| relTypeUuid.equals(TaxonRelationshipType.uuidProParteMisappliedNameFor.toString())
|
563
|
|| relTypeUuid.equals(TaxonRelationshipType.uuidPartialMisappliedNameFor.toString()))
|
564
|
//TODO formatting of ppMANs not yet implemented
|
565
|
&& nullSafeInt(srcRs, "relToTaxonId") != null;
|
566
|
return isMisapplied;
|
567
|
}
|
568
|
|
569
|
private String getMisappliedAuthor(ResultSet srcRs) throws SQLException {
|
570
|
String result;
|
571
|
String relAppendedPhrase = srcRs.getString("tbAppendedPhrase");
|
572
|
String secId = srcRs.getString("secId");
|
573
|
String secTitle = srcRs.getString("secTitle");
|
574
|
if(relAppendedPhrase == null && secId == null) {
|
575
|
result = " auct.";
|
576
|
}else if (relAppendedPhrase != null && secId == null){
|
577
|
result = " " + relAppendedPhrase;
|
578
|
}else if (relAppendedPhrase == null && secId != null){
|
579
|
result = " sensu " + secTitle;
|
580
|
}else{
|
581
|
result = " " + relAppendedPhrase + " " + secTitle;
|
582
|
}
|
583
|
String authorship = srcRs.getString("authorshipCache");
|
584
|
if (isNotBlank(authorship)){
|
585
|
result += ", non " + authorship;
|
586
|
}
|
587
|
return result;
|
588
|
}
|
589
|
|
590
|
private String getTaggedNameTitle(String nameCache, String nameTitle) {
|
591
|
if (nameCache == null){
|
592
|
logger.warn("NameCache is null");
|
593
|
return nameTitle;
|
594
|
}
|
595
|
String result = null;
|
596
|
try {
|
597
|
String[] nameCacheSplit = nameCache.split(" ");
|
598
|
String[] nameTitleSplit = nameTitle.split(" ");
|
599
|
result = "";
|
600
|
boolean currentIsName = false;
|
601
|
for (int i=0, j=0; j < nameTitleSplit.length; j++){
|
602
|
if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
|
603
|
&& !isMarker(nameCacheSplit[i])){
|
604
|
if(!currentIsName){
|
605
|
result += " <i>" + nameCacheSplit[i];
|
606
|
currentIsName = true;
|
607
|
}else{
|
608
|
result += " " + nameCacheSplit[i];
|
609
|
}
|
610
|
if((j+1)==nameTitleSplit.length){
|
611
|
result += "</i>";
|
612
|
}
|
613
|
i++;
|
614
|
}else{
|
615
|
if(currentIsName){
|
616
|
result += "</i>";
|
617
|
currentIsName = false;
|
618
|
}
|
619
|
result += " " + nameTitleSplit[j];
|
620
|
if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
|
621
|
&& isMarker(nameCacheSplit[i])){
|
622
|
i++;
|
623
|
}
|
624
|
}
|
625
|
}
|
626
|
return result.trim();
|
627
|
} catch (Exception e) {
|
628
|
e.printStackTrace();
|
629
|
return result;
|
630
|
}
|
631
|
}
|
632
|
|
633
|
private boolean isMarker(String nameCacheSplit) {
|
634
|
return nameCacheSplit.endsWith(".") || nameCacheSplit.equals("[unranked]")
|
635
|
|| nameCacheSplit.equals("grex")|| nameCacheSplit.equals("proles")
|
636
|
|| nameCacheSplit.equals("race");
|
637
|
}
|
638
|
|
639
|
private boolean testSingleTaxonRelations(int n) throws SQLException {
|
640
|
boolean success = true;
|
641
|
ResultSet srcRS = source.getResultSet(""
|
642
|
+ " SELECT t.name_id tid, pt.name_id pid "
|
643
|
+ " FROM TaxonNode tn "
|
644
|
+ " INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
|
645
|
+ " LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
|
646
|
+ " LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
|
647
|
+ " WHERE t.publish = 1 && pt.publish = 1 "
|
648
|
+ " ORDER BY CAST(t.name_id as char(20)) ");
|
649
|
|
650
|
ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
|
651
|
+ " FROM RelTaxon rel "
|
652
|
+ " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
|
653
|
+ " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
|
654
|
+ " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk = 101 "
|
655
|
+ " ORDER BY t1.IdInSource");
|
656
|
int i = 0;
|
657
|
while (srcRS.next() && destRS.next()){
|
658
|
success &= testSingleTaxonRelation(srcRS, destRS);
|
659
|
i++;
|
660
|
}
|
661
|
success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
|
662
|
return success;
|
663
|
}
|
664
|
|
665
|
private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
|
666
|
String id = String.valueOf(srcRS.getInt("tid"));
|
667
|
boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("tid"), destRS.getString("t1Id"), id);
|
668
|
success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("pid"), destRS.getString("t2Id"), id);
|
669
|
success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_TAXONOMICALLY_INCLUDED_IN, destRS.getInt("RelTaxonQualifierFk"), id);
|
670
|
success &= equals("Taxon relation qualifier cache", "is taxonomically included in", destRS.getString("RelQualifierCache"), id);
|
671
|
//TODO enable after next import
|
672
|
success &= isNull("notes", destRS, id);
|
673
|
//complete if no further relations need to added
|
674
|
return success;
|
675
|
}
|
676
|
|
677
|
private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
|
678
|
boolean success = true;
|
679
|
ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, MN.*, s.*, su.sourceuse_name "
|
680
|
+ " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
|
681
|
+ " LEFT JOIN sources s ON s.id = MN.source_id "
|
682
|
+ " LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
|
683
|
+ " ORDER BY CAST(tu.id as char(20)), MN.sourceuse_id, s.id "); //, no.note (not possible because ntext
|
684
|
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
|
685
|
+ " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
|
686
|
+ " INNER JOIN Source s ON s.SourceId = ats.SourceFk "
|
687
|
+ " LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
|
688
|
+ " WHERE t."+origEuroMed
|
689
|
+ " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
|
690
|
int count = 0;
|
691
|
while (srcRs.next() && destRs.next()){
|
692
|
success &= testSingleAdditionalTaxonSource(srcRs, destRs);
|
693
|
count++;
|
694
|
}
|
695
|
success &= equals("Notes count differs", n, count, "-1");
|
696
|
return success;
|
697
|
}
|
698
|
|
699
|
private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
700
|
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
|
701
|
boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
|
702
|
success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id); //currently we use the same id in ERMS and PESI
|
703
|
success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
|
704
|
success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
|
705
|
//TODO some records are still truncated ~ >820 characters
|
706
|
success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
|
707
|
success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
|
708
|
//Complete
|
709
|
return success;
|
710
|
}
|
711
|
|
712
|
private boolean testSingleNotes(int n) throws SQLException {
|
713
|
boolean success = true;
|
714
|
ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, no.*, l.LanName "
|
715
|
+ " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
|
716
|
+ " LEFT JOIN languages l ON l.LanID = no.lan_id "
|
717
|
+ " ORDER BY CAST(tu.id as char(20)), no.type, no.noteSortable "); //, no.note (not possible because ntext
|
718
|
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
|
719
|
+ " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
|
720
|
+ " LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
|
721
|
+ " LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
|
722
|
+ " WHERE t." + origEuroMed
|
723
|
+ " AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
|
724
|
+ " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1 ");
|
725
|
int count = 0;
|
726
|
while (srcRs.next() && destRs.next()){
|
727
|
success &= testSingleNote(srcRs, destRs);
|
728
|
count++;
|
729
|
}
|
730
|
success &= equals("Notes count differs", n, count, "-1");
|
731
|
return success;
|
732
|
}
|
733
|
|
734
|
private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
735
|
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
|
736
|
boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
|
737
|
success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
|
738
|
success &= isNull("Note_2", destRs, id);
|
739
|
success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
|
740
|
success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
|
741
|
success &= isNull("Region", destRs, id);
|
742
|
success &= isNull("SpeciesExpertGUID", destRs, id);
|
743
|
//SpeciesExpertName, LastAction, LastActionDate handled in separate method
|
744
|
//complete
|
745
|
return success;
|
746
|
}
|
747
|
|
748
|
private String normalizeNoteCatCache(String string) {
|
749
|
return StringUtils.capitalize(string)
|
750
|
.replace("Original Combination", "Original combination")
|
751
|
.replace("Taxonomic remark", "Taxonomic Remark");
|
752
|
}
|
753
|
|
754
|
private boolean testSingleDistributions(int n) throws SQLException {
|
755
|
boolean success = true;
|
756
|
ResultSet srcRs = source.getResultSet(
|
757
|
" SELECT CAST(tb.name_id as char(20)) AS tid, a.idInVocabulary, a.titleCache area, st.uuid statusUuid, "
|
758
|
+ " CASE WHEN deb.updated IS NOT NULL THEN deb.updated ELSE deb.created END as lastActionDate, "
|
759
|
+ " CASE WHEN deb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
|
760
|
+ " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
|
761
|
+ " LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
|
762
|
+ " LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
|
763
|
+ " LEFT JOIN DefinedTermBase st ON st.id = deb.status_id "
|
764
|
+ distributionCountWhere
|
765
|
+ " ORDER BY CAST(tb.name_id as char(20)), a.idInVocabulary, a.titleCache ");
|
766
|
ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaEmCode, oc.*, a.AreaName "
|
767
|
+ " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
|
768
|
+ " LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
|
769
|
+ " WHERE t." + origEuroMed
|
770
|
+ " ORDER BY t.IdInSource, a.AreaEmCode, a.AreaName, oc.Notes ");
|
771
|
int count = 0;
|
772
|
while (srcRs.next() && destRs.next()){
|
773
|
success &= testSingleDistribution(srcRs, destRs);
|
774
|
count++;
|
775
|
}
|
776
|
success &= equals("Distribution count differs", n, count, "-1");
|
777
|
return success;
|
778
|
}
|
779
|
|
780
|
private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
781
|
String id = String.valueOf(srcRs.getInt("tid") + "-" + srcRs.getString("area"));
|
782
|
boolean success = equals("Distribution taxonID ", "NameId: " + String.valueOf(srcRs.getInt("tid")), destRs.getString("IdInSource"), id);
|
783
|
success &= equals("Distribution AreaEmCode ", srcRs.getString("idInVocabulary"), destRs.getString("AreaEmCode"), id);
|
784
|
// success &= equals("Distribution area name ", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaName"), id);
|
785
|
success &= equals("Distribution area name cache", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaNameCache"), id);
|
786
|
success &= equals("Distribution OccurrenceStatusFk", mapStatus(srcRs.getString("statusUuid")), destRs.getInt("OccurrenceStatusFk"), id);
|
787
|
//TODO success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
|
788
|
success &= isNull("SourceFk", destRs, id); //sources should be moved to extra table only, according to script there were values, but in PESI 2014 values existed only in OccurrenceSource table (for all only E+M records)
|
789
|
success &= isNull("SourceCache", destRs, id); //sources should be moved to extra table, see above
|
790
|
//TODO success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
|
791
|
success &= isNull("SpeciesExpertGUID", destRs, id); //SpeciesExpertGUID does not exist in EM and according to script
|
792
|
success &= isNull("SpeciesExpertName", destRs, id); //SpeciesExpertName does not exist in EM and according to script
|
793
|
success &= equals("Distribution Last Action", srcRs.getString("lastAction"), destRs.getString("LastAction"), id);
|
794
|
success &= equals("Distribution Last Action Date", srcRs.getTimestamp("lastActionDate"), destRs.getTimestamp("LastActionDate"), id);
|
795
|
return success;
|
796
|
}
|
797
|
|
798
|
/**
|
799
|
* @param string
|
800
|
* @return
|
801
|
*/
|
802
|
private Integer mapStatus(String uuidStr) {
|
803
|
UUID uuid = UUID.fromString(uuidStr);
|
804
|
if (uuid.equals(PresenceAbsenceTerm.uuidNativeError) ){ //native, reported in error
|
805
|
return PesiTransformer.STATUS_ABSENT;
|
806
|
}else if (uuid.equals(PresenceAbsenceTerm.uuidIntroducedAdventitious) //casual, introduced adventitious
|
807
|
|| uuid.equals(PresenceAbsenceTerm.uuidIntroducedUncertainDegreeNaturalisation)//introduced: uncertain degree of naturalisation
|
808
|
|| uuid.equals(PresenceAbsenceTerm.uuidIntroduced)){
|
809
|
return PesiTransformer.STATUS_INTRODUCED;
|
810
|
}else if (uuid.equals(PresenceAbsenceTerm.uuidNative) ){ //native
|
811
|
return PesiTransformer.STATUS_NATIVE;
|
812
|
}else if (uuid.equals(PresenceAbsenceTerm.uuidNaturalised) ){ //naturalised
|
813
|
return PesiTransformer.STATUS_NATURALISED;
|
814
|
}else if (uuid.equals(PresenceAbsenceTerm.uuidNativePresenceQuestionable) ){ //native, presence questionable
|
815
|
return PesiTransformer.STATUS_DOUBTFUL;
|
816
|
}else if (uuid.equals(PresenceAbsenceTerm.uuidCultivated) ){ //cultivated
|
817
|
return PesiTransformer.STATUS_MANAGED;
|
818
|
}else if (uuid.equals(BerlinModelTransformer.uuidStatusUndefined) ){ //native, reported in error
|
819
|
return -1;
|
820
|
}
|
821
|
|
822
|
return null;
|
823
|
}
|
824
|
|
825
|
private String normalizeDistrArea(String area) {
|
826
|
if (area == null){
|
827
|
return null;
|
828
|
}else if ("France".equals(area)){return "French mainland";
|
829
|
}else if ("France, with Channel Islands and Monaco".equals(area)){return "France";
|
830
|
}else if ("Greece".equals(area)){return "Greece with Cyclades and more islands";
|
831
|
}else if ("Spain, with Gibraltar and Andorra (without Bl and Ca)".equals(area)){return "Spain";
|
832
|
}else if ("Italy, with San Marino and Vatican City (without Sa and Si(S))".equals(area)){return "Italy";
|
833
|
}else if ("Morocco, with Spanish territories".equals(area)){return "Morocco";
|
834
|
}else if ("Serbia including Kosovo and Vojvodina".equals(area)){return "Serbia including Vojvodina and with Kosovo";
|
835
|
}else if ("Caucasia (Ab + Ar + Gg + Rf(CS))".equals(area)){return "Caucasus region";
|
836
|
}else if ("Georgia, with Abchasia and Adzharia".equals(area)){return "Georgia";
|
837
|
}else if ("Canary Is.".equals(area)){return "Canary Islands";
|
838
|
}else if ("Kriti with Karpathos, Kasos & Gavdhos".equals(area)){return "Crete with Karpathos, Kasos & Gavdhos";
|
839
|
}else if ("Ireland, with N Ireland".equals(area)){return "Ireland";
|
840
|
}else if ("mainland Spain".equals(area)){return "Kingdom of Spain";
|
841
|
}else if ("Portugal".equals(area)){return "Portuguese mainland";
|
842
|
}else if ("Svalbard".equals(area)){return "Svalbard with Björnöya and Jan Mayen";
|
843
|
}else if ("Norway".equals(area)){return "Norwegian mainland";
|
844
|
}else if ("Ukraine".equals(area)){return "Ukraine including Crimea";
|
845
|
}else if ("Turkey-in-Europe".equals(area)){return "European Turkey";
|
846
|
}else if ("Azerbaijan".equals(area)){return "Azerbaijan including Nakhichevan";
|
847
|
}else if ("Ireland".equals(area)){return "Republic of Ireland";
|
848
|
}else if ("France".equals(area)){return "French mainland";
|
849
|
}
|
850
|
return area;
|
851
|
}
|
852
|
|
853
|
private boolean testSingleCommonNames(int n) throws SQLException {
|
854
|
boolean success = true;
|
855
|
ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
|
856
|
+ " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
|
857
|
+ " ORDER BY CAST(tu.id as char(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
|
858
|
ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
|
859
|
+ " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
|
860
|
+ " WHERE t." + origEuroMed
|
861
|
+ " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate "); //sorting also lastActionDate results in a minimum of exact duplicate problems
|
862
|
int count = 0;
|
863
|
while (srcRs.next() && destRs.next()){
|
864
|
success &= testSingleCommonName(srcRs, destRs);
|
865
|
count++;
|
866
|
}
|
867
|
success &= equals("Common name count differs", n, count, "-1");
|
868
|
return success;
|
869
|
}
|
870
|
|
871
|
boolean prefer639_3 = true;
|
872
|
String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
|
873
|
String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
|
874
|
|
875
|
private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
|
876
|
String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
|
877
|
boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
|
878
|
success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
|
879
|
success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
|
880
|
success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
|
881
|
//TODO cn lan_id needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
|
882
|
success &= isNull("Region", destRs, id); //region does not seem to exist in ERMS
|
883
|
//TODO cn sources, see comments
|
884
|
// success &= isNull("SourceFk", destRs); //sources should be moved to extra table, check with PESI 2014
|
885
|
// success &= isNull("SourceNameCache", destRs); //sources should be moved to extra table, check with PESI 2014
|
886
|
success &= isNull("SpeciesExpertGUID", destRs, id); //SpeciesExpertGUID does not exist in ERMS
|
887
|
//SpeciesExpertName,LastAction,LastActionDate handled in separate method
|
888
|
//complete
|
889
|
return success;
|
890
|
}
|
891
|
|
892
|
private String normalizeLang(String string) {
|
893
|
if ("Spanish".equals(string)){
|
894
|
return "Spanish, Castillian";
|
895
|
}else if ("Modern Greek (1453-)".equals(string)){
|
896
|
return "Greek";
|
897
|
}else if ("Malay (individual language)".equals(string)){
|
898
|
return "Malay";
|
899
|
}else if ("Swahili (individual language)".equals(string)){
|
900
|
return "Swahili";
|
901
|
}
|
902
|
|
903
|
return string;
|
904
|
}
|
905
|
|
906
|
private String getLanguageIso(ResultSet destRs) throws SQLException {
|
907
|
String result = destRs.getString(preferredISO639);
|
908
|
if (result == null){
|
909
|
result = destRs.getString(alternativeISO639);
|
910
|
}
|
911
|
return result;
|
912
|
}
|
913
|
|
914
|
private boolean testSingleReferences(int count) throws SQLException {
|
915
|
boolean success = true;
|
916
|
ResultSet srcRS = source.getResultSet("SELECT r.*, a.titleCache author "
|
917
|
+ " FROM Reference r LEFT OUTER JOIN AgentBase a ON r.authorship_id = a.id "
|
918
|
+ " ORDER BY r.id ");
|
919
|
ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
|
920
|
+ " WHERE s." + origEuroMed
|
921
|
+ " ORDER BY s.RefIdInSource "); // +1 for the source reference "erms" but this has no OriginalDB
|
922
|
int i = 0;
|
923
|
while (srcRS.next() && destRS.next()){
|
924
|
success &= testSingleReference(srcRS, destRS);
|
925
|
i++;
|
926
|
}
|
927
|
success &= equals("References count differs", count, i, "-1");
|
928
|
return success;
|
929
|
}
|
930
|
|
931
|
private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
|
932
|
String id = String.valueOf(srcRS.getInt("id"));
|
933
|
boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
|
934
|
success &= isNull("IMIS_Id", destRS, id); //for E+M no IMIS id exists
|
935
|
success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("refType")), destRS.getInt("SourceCategoryFk"), id);
|
936
|
success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("refType")), destRS.getString("SourceCategoryCache"), id);
|
937
|
success &= equals("Reference name ", srcRS.getString("titleCache"), destRS.getString("Name"), id);
|
938
|
success &= equals("Reference abstract ", srcRS.getString("referenceAbstract"), destRS.getString("Abstract"), id);
|
939
|
success &= equals("Reference title ", srcRS.getString("title"), destRS.getString("Title"), id);
|
940
|
success &= equals("Reference author string ", srcRS.getString("author"), destRS.getString("AuthorString"), id);
|
941
|
//TODO reference year
|
942
|
success &= equals("Reference year ", normalizeYear(srcRS), destRS.getString("RefYear"), id);
|
943
|
//FIXME reference nomrefcache
|
944
|
// success &= equals("Reference NomRefCache ", srcRS.getString("abbrevTitleCache"), destRS.getString("NomRefCache"), id);
|
945
|
success &= equals("Reference DOI ", srcRS.getString("doi"), destRS.getString("Doi"), id);
|
946
|
success &= equals("Reference link ", srcRS.getString("uri"), destRS.getString("Link"), id);
|
947
|
//TODO reference Notes
|
948
|
// success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
|
949
|
//complete
|
950
|
return success;
|
951
|
}
|
952
|
|
953
|
private Integer convertSourceTypeFk(String sourceType) {
|
954
|
if (sourceType == null){
|
955
|
return null;
|
956
|
}else if ("DB".equals(sourceType)){
|
957
|
return PesiTransformer.REF_DATABASE;
|
958
|
}else if ("JOU".equals(sourceType)){
|
959
|
return PesiTransformer.REF_JOURNAL;
|
960
|
}else if ("BK".equals(sourceType)){
|
961
|
return PesiTransformer.REF_BOOK;
|
962
|
}else if ("GEN".equals(sourceType)){
|
963
|
return PesiTransformer.REF_UNRESOLVED;
|
964
|
}else if ("SER".equals(sourceType)){
|
965
|
// TODO correct?
|
966
|
return PesiTransformer.REF_UNRESOLVED;
|
967
|
}
|
968
|
return null;
|
969
|
}
|
970
|
private String convertSourceTypeCache(String sourceType) {
|
971
|
if (sourceType == null){
|
972
|
return null;
|
973
|
}else if ("DB".equals(sourceType)){
|
974
|
return "database";
|
975
|
}else if ("JOU".equals(sourceType)){
|
976
|
return "journal";
|
977
|
}else if ("BK".equals(sourceType)){
|
978
|
return "book";
|
979
|
}else if ("SER".equals(sourceType)){
|
980
|
return "published";
|
981
|
}else if ("BK".equals(sourceType)){
|
982
|
return "book";
|
983
|
}else if ("GEN".equals(sourceType)){
|
984
|
return "unresolved";
|
985
|
}
|
986
|
return null;
|
987
|
}
|
988
|
|
989
|
private boolean testReferenceCount() {
|
990
|
int countSrc = source.getUniqueInteger(countReferencesStr);
|
991
|
int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origEuroMed); // +1 for the source reference "erms" but this has no OriginalDB
|
992
|
boolean success = equals("Reference count ", countSrc, countDest, "-1");
|
993
|
return success;
|
994
|
}
|
995
|
|
996
|
private String normalizeYear(ResultSet rs) throws SQLException {
|
997
|
String freetext = rs.getString("datePublished_freetext");
|
998
|
if(StringUtils.isNotBlank(freetext)){
|
999
|
return freetext;
|
1000
|
}
|
1001
|
String start = rs.getString("datePublished_start");
|
1002
|
String end = rs.getString("datePublished_end");
|
1003
|
if (start != null){
|
1004
|
start = start.substring(0,4);
|
1005
|
}
|
1006
|
if (end != null){
|
1007
|
end = end.substring(0,4);
|
1008
|
}
|
1009
|
String result = start == null? null: start + (end==null? "": "-"+ end);
|
1010
|
return result;
|
1011
|
}
|
1012
|
|
1013
|
private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
|
1014
|
if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
|
1015
|
LocalDate date1 = srcDate.toLocalDateTime().toLocalDate();
|
1016
|
LocalDate date2 = destDate.toLocalDateTime().toLocalDate();
|
1017
|
if (date1.equals(date2) || date1.plusDays(1).equals(date2)){
|
1018
|
logger.info(messageStart + " were (almost) equal: " + srcDate);
|
1019
|
return true;
|
1020
|
}else{
|
1021
|
String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+ srcDate + "; Destination: " + destDate;
|
1022
|
logger.warn(message);
|
1023
|
return false;
|
1024
|
}
|
1025
|
}else{
|
1026
|
logger.info(messageStart + " were equal: " + srcDate);
|
1027
|
return true;
|
1028
|
}
|
1029
|
}
|
1030
|
|
1031
|
private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
|
1032
|
String strId = id.equals("-1")? "": (id+ ": ");
|
1033
|
if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
|
1034
|
String message = strId+ messageStart + " must be equal, but was not.\n Source: "+ nSrc + "; Destination: " + nDest;
|
1035
|
logger.warn(message);
|
1036
|
return false;
|
1037
|
}else{
|
1038
|
logger.info(strId + messageStart + " were equal: " + nSrc);
|
1039
|
return true;
|
1040
|
}
|
1041
|
}
|
1042
|
|
1043
|
//** ************* MAIN ********************************************/
|
1044
|
|
1045
|
public static void main(String[] args){
|
1046
|
PesiEuroMedValidator validator = new PesiEuroMedValidator();
|
1047
|
validator.invoke(new Source(defaultSource), defaultDestination);
|
1048
|
System.exit(0);
|
1049
|
}
|
1050
|
}
|