ref #8577 cleanup E+M PESI validation
[cdmlib-apps.git] / cdm-pesi / src / main / java / eu / etaxonomy / cdm / io / pesi / euromed / PesiEuroMedValidator.java
1 /**
2 * Copyright (C) 2019 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.pesi.euromed;
10
11 import java.sql.ResultSet;
12 import java.sql.SQLException;
13 import java.sql.Timestamp;
14 import java.time.LocalDate;
15 import java.util.UUID;
16
17 import org.apache.commons.lang.StringUtils;
18 import org.apache.log4j.Logger;
19
20 import eu.etaxonomy.cdm.app.common.CdmDestinations;
21 import eu.etaxonomy.cdm.app.pesi.PesiDestinations;
22 import eu.etaxonomy.cdm.common.CdmUtils;
23 import eu.etaxonomy.cdm.database.ICdmDataSource;
24 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
25 import eu.etaxonomy.cdm.io.common.Source;
26 import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
27 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
28 import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
29
30 /**
31 * Tests the ERMS -> PESI pipeline by comparing the source DB with destination PESI DB.
32 *
33 * @author a.mueller
34 * @since 01.09.2019
35 */
36 public class PesiEuroMedValidator {
37
38 private static final Logger logger = Logger.getLogger(PesiEuroMedValidator.class);
39
40 private static final ICdmDataSource defaultSource = CdmDestinations.cdm_test_local_mysql_euromed();
41 // private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI();
42 private static final Source defaultDestination = PesiDestinations.pesi_test_local_CDM_EM2PESI_2();
43
44 boolean doReferences = false;
45 boolean doTaxa = true;
46 boolean doTaxRels = false;
47 boolean doDistributions = false;
48 boolean doCommonNames = false;
49 boolean doNotes = false;
50 boolean doAdditionalTaxonSources = false;
51
52 private Source source = new Source(defaultSource);
53 private Source destination = defaultDestination;
54
55 private String origEuroMed = "OriginalDB = 'E+M' ";
56
57 public void invoke(Source source, Source destination){
58 logger.warn("Validate destination " + destination.getDatabase());
59 boolean success = true;
60 try {
61 this.source = source;
62 this.destination = destination;
63 success &= testReferences();
64 success &= testTaxa();
65 success &= testTaxonRelations();
66 success &= testDistributions();
67 success &= testCommonNames();
68 success &= testNotes();
69 success &= testAdditionalTaxonSources();
70 } catch (Exception e) {
71 e.printStackTrace();
72 success = false;
73 }
74 //TBC
75 System.out.println("end validation " + (success? "":"NOT ") + "successful.");
76 }
77
78 private boolean testAdditionalTaxonSources() throws SQLException {
79 if (!doAdditionalTaxonSources){
80 return true;
81 }
82 System.out.println("Start validate additional taxon sources");
83 boolean success = testAdditionalTaxonSourcesCount();
84 if (success){
85 success &= testSingleAdditionalTaxonSources(source.getUniqueInteger(countAddtionalTaxonSource));
86 }
87 return success;
88 }
89
90 private boolean testNotes() throws SQLException {
91 if (!doNotes){
92 return true;
93 }
94 System.out.println("Start validate notes");
95 boolean success = testNotesCount();
96 if (success){
97 success &= testSingleNotes(source.getUniqueInteger("SELECT count(*) FROM notes "));
98 }
99 return success;
100 }
101
102 private boolean testDistributions() throws SQLException {
103 if (!doDistributions){
104 return true;
105 }
106 System.out.println("Start validate distributions");
107 boolean success = testDistributionCount();
108 if (!success){
109 success &= testSingleDistributions(source.getUniqueInteger(distributionCountSQL));
110 }
111 return success;
112 }
113
114 private boolean testCommonNames() throws SQLException {
115 if (!doCommonNames){
116 return true;
117 }
118 System.out.println("Start validate common names");
119 boolean success = testCommonNameCount();
120 if (success){
121 success &= testSingleCommonNames(source.getUniqueInteger("SELECT count(*) FROM vernaculars "));
122 }
123 return success;
124 }
125
126 int countSynonyms;
127 int countIncludedIns;
128 private boolean testTaxonRelations() throws SQLException {
129 if (!doTaxRels){
130 return true;
131 }
132 System.out.println("Start validate taxon relations");
133 boolean success = testSynonymRelations();
134 success &= testIncludedInRelations();
135 success &= testTotalRelations();
136 success &= testNameRelations();
137 return success;
138 }
139
140 private boolean testTotalRelations() {
141 if (!(countSynonyms < 0 || countIncludedIns < 0)){
142 int countTotalSrc = countSynonyms + countIncludedIns;
143 int countSrc = source.getUniqueInteger("SELECT count(*) FROM tu ");
144 boolean success = equals("Taxrel count + 1 must be same as source taxon count ", countTotalSrc+1, countSrc, String.valueOf(-1));
145 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t."+ origEuroMed);
146 success &= equals("Taxrel count + 1 must be same as destination taxon count ", countTotalSrc+1, countDest, String.valueOf(-1));
147 return success;
148 }else{
149 return false;
150 }
151 }
152
153 private final String countSynonymRelation = "SELECT count(*) FROM TaxonBase syn LEFT JOIN TaxonBase acc ON syn.acceptedTaxon_id = acc.id WHERE syn.publish = 1 AND acc.publish = 1 ";
154 private boolean testSynonymRelations() throws SQLException {
155
156 int countSrc = source.getUniqueInteger(countSynonymRelation);
157 int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk > 101");
158 boolean success = equals("Synonym count ", countSrc, countDest, String.valueOf(-1));
159 if (success){
160 //TODO test single synonym relations
161 success &= testSingleSynonymRelations(source.getUniqueInteger(countSynonymRelation));
162 }
163 countSynonyms = (countSrc == countDest)? countSrc : -1;
164 return success;
165 }
166
167 private boolean testSingleSynonymRelations(int n) throws SQLException {
168 boolean success = true;
169 ResultSet srcRS = source.getResultSet(""
170 + " SELECT t.id tid, pt.id pid "
171 + " FROM TaxonNode tn "
172 + " INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
173 + " LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
174 + " LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
175 + " WHERE t.publish = 1 && pt.publish = 1 "
176 + " ORDER BY CAST(tb.id as char(20)) ");
177
178 ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
179 + " FROM RelTaxon rel "
180 + " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
181 + " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
182 + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk > 101 "
183 + " ORDER BY t1.IdInSource");
184 int i = 0;
185 while (srcRS.next() && destRS.next()){
186 success &= testSingleSynonymRelation(srcRS, destRS);
187 i++;
188 }
189 success &= equals("Synonym relation count for single compare", n, i, String.valueOf(-1));
190 return success;
191 }
192
193 private boolean testSingleSynonymRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
194 String id = String.valueOf(srcRS.getInt("id"));
195 boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("id"), destRS.getString("t1Id"), id);
196 success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("tu_acctaxon"), destRS.getString("t2Id"), id);
197 success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_SYNONYM_OF, destRS.getInt("RelTaxonQualifierFk"), id);
198 success &= equals("Taxon relation qualifier cache", "is synonym of", destRS.getString("RelQualifierCache"), id);
199 //TODO enable after next import
200 // success &= isNull("notes", destRS);
201 //complete if no further relations need to added
202 return success;
203 }
204
205 private boolean testNameRelations() {
206 //Name relations
207 int countSrc = source.getUniqueInteger("SELECT count(*) FROM NameRelationship WHERE ("
208 + " 1=1 "
209 + ")");
210 int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk <100 ");
211 boolean success = equals("Taxon name relation count ", countSrc, countDest, String.valueOf(-1));
212 if (success){
213 //TODO test single name relation
214 // success &= testSingleNameRelations(source.getUniqueInteger(countSynonymRelation));
215 }
216 return success;
217 }
218
219 private final String countParentRelation = "SELECT count(*) "
220 + " FROM TaxonNode tn "
221 + " INNER JOIN TaxonBase tb ON tn.taxon_id = tb.id "
222 + " LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
223 + " LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
224 + " WHERE tb.publish = 1 && pt.publish = 1 ";
225
226 private boolean testIncludedInRelations() throws SQLException {
227 int countSrc = source.getUniqueInteger(countParentRelation);
228 int countDest = destination.getUniqueInteger("SELECT count(*) FROM RelTaxon WHERE RelTaxonQualifierFk = 101 ");
229 boolean success = equals("Tax included in count ", countSrc, countDest, String.valueOf(-1));
230 if (success){
231 success &= testSingleTaxonRelations(source.getUniqueInteger(countParentRelation));
232 }
233 countIncludedIns = (countSrc == countDest)? countSrc : -1;
234 return success;
235 }
236
237 private boolean testTaxa() throws SQLException {
238 if (!doTaxa){
239 return true;
240 }
241 System.out.println("Start validate taxa");
242 boolean success = testTaxaCount();
243 if (success){
244 success &= testSingleTaxa(source.getUniqueInteger(countTaxon));
245 }
246 return success;
247 }
248
249 String countReferencesStr = "SELECT count(*) FROM reference ";
250 private boolean testReferences() throws SQLException {
251 if (!doReferences){
252 return true;
253 }
254 System.out.println("Start validate references");
255 boolean success = testReferenceCount();
256 if (success){
257 success &= testSingleReferences(source.getUniqueInteger(countReferencesStr));
258 }
259 return success;
260 }
261
262 private final String countAddtionalTaxonSource = "SELECT count(*) FROM tu_sources ts ";
263 private boolean testAdditionalTaxonSourcesCount() {
264 int countSrc = source.getUniqueInteger(countAddtionalTaxonSource);
265 int countDest = destination.getUniqueInteger("SELECT count(*) FROM AdditionalTaxonSource ");
266 return equals("AdditionalTaxonSource count ", countSrc, countDest, String.valueOf(-1));
267 }
268
269 private boolean testNotesCount() {
270 int countSrc = source.getUniqueInteger("SELECT count(*) FROM notes ");
271 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Note "
272 + " WHERE (1=1) ");
273 boolean result = equals("Notes count ", countSrc, countDest, String.valueOf(-1));
274
275 return result;
276 }
277
278 private String distributionCountWhere = " WHERE deb.DTYPE = 'Distribution' AND tb.publish = 1 AND a.uuid NOT IN ("
279 + "'111bdf38-7a32-440a-9808-8af1c9e54b51'," //E+M
280 //Former UUSR
281 + "'c4a898ce-0f32-44fe-a8a3-278e11a4ba53','a575d608-dd53-4c01-b2af-5067d0711f64','da4e9cc3-b1cc-403a-81ff-bcc5d9fadbd1',"
282 + "'7e0f8fa3-5db9-48f0-9fa8-87fcab3eaa53','2188e3a5-0446-47c8-b11b-b4b2b9a71c75','44f262e3-5091-4d28-8081-440d3978fb0b',"
283 + "'efabc8fd-0b3c-475b-b532-e1ca0ba0bdbb') ";
284 private String distributionCountSQL = "SELECT count(*) as n "
285 + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
286 + " LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
287 + " LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
288 + distributionCountWhere;
289 private boolean testDistributionCount() {
290 int countSrc = source.getUniqueInteger(distributionCountSQL);
291 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Occurrence ");
292 return equals("Occurrence count ", countSrc, countDest, String.valueOf(-1));
293 }
294
295 private boolean testCommonNameCount() {
296 int countSrc = source.getUniqueInteger("SELECT count(*) FROM vernaculars ");
297 int countDest = destination.getUniqueInteger("SELECT count(*) FROM CommonName ");
298 return equals("CommonName count ", countSrc, countDest, String.valueOf(-1));
299 }
300
301 private final String countTaxon = "SELECT count(*) FROM TaxonBase tb WHERE tb.publish = 1 ";
302 private boolean testTaxaCount() {
303 int countSrc = source.getUniqueInteger(countTaxon);
304 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Taxon t WHERE t.SourceFk IS NOT NULL OR t.AuthorString = 'auct.' ");
305 boolean result = equals("Taxon count ", countSrc, countDest, String.valueOf(-1));
306 return result;
307 }
308
309 private boolean testSingleTaxa(int n) throws SQLException {
310 boolean success = true;
311 ResultSet srcRS = source.getResultSet("SELECT CAST(tn.id as char(20)) tid, tb.uuid as GUID, pt.id parentId, "
312 + " tn.rank_id, rank.titleCache rank_name, "
313 + " sec.titleCache secTitle,"
314 + " tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
315 + " tn.nameCache, tn.authorshipCache, tn.titleCache nameTitleCache, tn.fullTitleCache nameFullTitleCache, "
316 + " tb.DTYPE taxStatus, taxRelType.uuid taxRelTypeUuid, nsType.id nsId, nsType.idInVocabulary nsTitle, "
317 + " typeName_id, typeName.titleCache typeFullNameCache, "
318 + " CASE WHEN tb.updated IS NOT NULL THEN tb.updated ELSE tb.created END as lastActionDate, "
319 + " CASE WHEN tb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
320 + " FROM TaxonBase tb "
321 + " LEFT JOIN TaxonName tn on tb.name_id = tn.id "
322 + " LEFT JOIN DefinedTermBase rank ON rank.id = tn.rank_id "
323 + " LEFT JOIN Reference sec ON sec.id = tb.sec_id "
324 + " LEFT JOIN TaxonName_NomenclaturalStatus nsMN ON tn.id = nsMN.TaxonName_id "
325 + " LEFT JOIN NomenclaturalStatus ns ON ns.id = nsMN.status_id "
326 + " LEFT JOIN DefinedTermBase nsType ON nsType.id = ns.type_id "
327 + " LEFT JOIN TaxonName_TypeDesignationBase typeMN ON typeMN.TaxonName_id = tn.id "
328 + " LEFT JOIN TypeDesignationBase td ON td.id = typeMN.typedesignations_id "
329 + " LEFT JOIN TaxonName typeName ON typeName.id = td.typeName_id "
330 + " LEFT JOIN TaxonNode n ON n.taxon_id = tb.id "
331 + " LEFT JOIN TaxonNode ptn ON n.parent_id = ptn.id "
332 + " LEFT JOIN TaxonBase pt ON pt.id = ptn.taxon_id AND pt.publish = 1 "
333 + " LEFT JOIN TaxonRelationship tr ON tr.relatedFrom_id = tb.id "
334 + " LEFT JOIN DefinedTermBase taxRelType ON taxRelType.id = tr.type_id"
335 + " WHERE tb.publish = 1 "
336 + " GROUP BY tid, GUID, tn.rank_id, rank.titleCache, secTitle,"
337 + " tn.genusOrUninomial, tn.infraGenericEpithet, tn.specificEpithet, tn.infraSpecificEpithet, "
338 + " tn.nameCache, tn.authorshipCache, tn.titleCache, "
339 + " tb.DTYPE, tb.updated, tb.created " //for duplicates caused by >1 name status
340 + " ORDER BY tid, GUID, lastActionDate ");
341 ResultSet destRS = destination.getResultSet("SELECT t.*, "
342 + " pt.IdInSource parentSourceId, " //not needed
343 + " s.Name as sourceName, type.IdInSource typeSourceId, r.Rank "
344 + " FROM Taxon t "
345 + " LEFT JOIN Taxon pt ON pt.TaxonId = t.ParentTaxonFk "
346 + " LEFT JOIN Taxon type ON type.TaxonId = t.TypeNameFk "
347 + " LEFT JOIN Rank r ON r.RankId = t.RankFk AND r.KingdomId = t.KingdomFk "
348 + " LEFT JOIN Source s ON s.SourceId = t.SourceFk "
349 + " WHERE t."+ origEuroMed + " AND (t.SourceFk IS NOT NULL OR t.AuthorString = 'auct.') " //FIXME remove SourceFk filter is only preliminary for first check
350 + " ORDER BY t.IdInSource, t.GUID, t.LastActionDate, AuthorString ");
351 int i = 0;
352 logger.error("remove SourceFk filter is only preliminary for first check");
353 while (srcRS.next() && destRS.next()){
354 success &= testSingleTaxon(srcRS, destRS);
355 i++;
356 }
357 success &= equals("Taxon count for single compare", n, i, String.valueOf(-1));
358 return success;
359 }
360
361 private boolean testSingleTaxon(ResultSet srcRS, ResultSet destRS) throws SQLException {
362 String id = String.valueOf(srcRS.getInt("tid"));
363 //TODO decide, according to SQL it also contains the taxon UUID, but in PESI2014 backup I can't find this
364 boolean success = equals("Taxon ID", "NameId: " + srcRS.getInt("tid"), destRS.getString("IdInSource"), id);
365 success &= equals("Taxon source", srcRS.getString("secTitle"), destRS.getString("sourceName"), id);
366
367 success &= equals("Taxon kingdomFk", "3", destRS.getString("KingdomFk"), id);
368 //difficult to test success &= equals("Taxon rank fk", srcRS.getString("rank_id"), destRS.getString("RankFk"), id);
369 success &= equals("Taxon rank cache", normalizeRank(srcRS.getString("rank_name")), destRS.getString("Rank"), id);
370 success &= equals("Taxon genusOrUninomial", srcRS.getString("genusOrUninomial"), destRS.getString("GenusOrUninomial"), id) ;
371 success &= equals("Taxon infraGenericEpithet", srcRS.getString("infraGenericEpithet"), destRS.getString("InfraGenericEpithet"), id) ;
372 success &= equals("Taxon specificEpithet", srcRS.getString("specificEpithet"), destRS.getString("SpecificEpithet"), id) ;
373 success &= equals("Taxon infraSpecificEpithet", srcRS.getString("infraSpecificEpithet"), destRS.getString("InfraSpecificEpithet"), id) ;
374
375 success &= equals("Taxon websearchname", srcRS.getString("nameCache"), destRS.getString("WebSearchName"), id);
376 //TODO success &= equals("Taxon WebShowName", srcRS.getString("tu_displayname"), destRS.getString("WebShowName"), id);
377 //FIXME sensu+auct. autoren success &= equals("Taxon authority", srcRS.getString("authorshipCache"), destRS.getString("AuthorString"), id);
378 //FIXME sensu+auct. autoren success &= equals("Taxon FullName", srcRS.getString("nameTitleCache"), destRS.getString("FullName"), id);
379 success &= equals("Taxon NomRefString", makeNomRefString(srcRS), destRS.getString("NomRefString"), id);
380 success &= equals("Taxon DisplayName", makeDisplayName(srcRS), destRS.getString("DisplayName"), id); //in ERMS according to SQL script same as FullName, no nom.ref. information attached
381 //difficult to test success &= equals("Taxon NameStatusFk", nullSafeInt(srcRS, "nsId"),nullSafeInt( destRS,"NameStatusFk"), id);
382 success &= equals("Taxon NameStatusCache", srcRS.getString("nsTitle"), destRS.getString("NameStatusCache"), id);
383
384 //reimport success &= equals("Taxon TaxonStatusFk", mapTaxStatusFk(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), nullSafeInt( destRS,"TaxonStatusFk"), id);
385 //reimport success &= equals("Taxon TaxonStatusCache", mapTaxStatus(srcRS.getString("taxStatus"), srcRS.getString("taxRelTypeUuid")), destRS.getString("TaxonStatusCache"), id);
386
387 success &= equals("Taxon ParentTaxonFk", nullSafeInt(srcRS, "parentId"), nullSafeInt(destRS, "ParentTaxonFk"), id);
388
389 Integer origTypeNameFk = nullSafeInt(srcRS, "typeName_id");
390 success &= equals("Taxon TypeNameFk", origTypeNameFk == null? null : "NameId: " + origTypeNameFk, destRS.getString("typeSourceId"), id);
391 success &= equals("Taxon TypeFullNameCache", srcRS.getString("typeFullNameCache"), destRS.getString("TypeFullNameCache"), id);
392 //according to SQL always constant, could be changed in future
393 success &= equals("Taxon QualityStatusFK", 2, nullSafeInt( destRS,"QualityStatusFk"), String.valueOf(id));
394 success &= equals("Taxon QualityStatusCache", "Added by Database Management Team", destRS.getString("QualityStatusCache"), id);
395 //TODO TreeIndex
396 success &= isNull("FossilStatusFk", destRS);
397 success &= isNull("FossilStatusCache", destRS);
398 success &= equals("Taxon GUID", srcRS.getString("GUID"), destRS.getString("GUID"), id);
399 success &= equals("Taxon DerivedFromGuid", srcRS.getString("GUID"), destRS.getString("DerivedFromGuid"), id); //according to SQL script GUID and DerivedFromGuid are always the same, according to 2014DB this is even true for all databases
400 success &= isNull("ExpertGUID", destRS); //according to SQL + PESI2014
401 //reimport success &= equals("Taxon ExpertName", srcRS.getString("secTitle"), destRS.getString("ExpertName"), id);
402 success &= isNull("SpeciesExpertGUID", destRS);
403 //reimport success &= equals("Taxon SpeciesExpertName", srcRS.getString("secTitle"), destRS.getString("SpeciesExpertName"), id);
404 //FIXME !! success &= equals("Taxon cache citation", srcRS.getString("secTitle"), destRS.getString("CacheCitation"), id);
405 success &= equals("Taxon Last Action", srcRS.getString("lastAction"), destRS.getString("LastAction"), id);
406 success &= equals("Taxon Last Action Date", srcRS.getTimestamp("lastActionDate"), destRS.getTimestamp("LastActionDate"), id);
407
408 success &= isNull("GUID2", destRS); //only relevant after merge
409 success &= isNull("DerivedFromGuid2", destRS); //only relevant after merge
410 return success;
411 }
412
413 private String makeNomRefString(ResultSet srcRS) throws SQLException {
414 //there is no pure nomRefString field in CDM and also computing is only possible
415 //with cache strategy which requires a running CDM instance. So this is a workaround that maybe needs to be adapted
416 String result = null;
417 String fullTitle = srcRS.getString("nameFullTitleCache");
418 String nameTitleCache = srcRS.getString("nameTitleCache");
419 String nameStatus = CdmUtils.Nz(srcRS.getString("nsTitle"));
420 if (fullTitle != null && nameTitleCache != null){
421 result = fullTitle.substring(nameTitleCache.length())
422 .replaceAll("^, ", "")
423 .replaceAll("(, |^)"+nameStatus+"$", "");
424 }
425 return result;
426 }
427
428 private String mapTaxStatus(String dtype, String taxRelTypeUuidStr) {
429 Integer statusFk = mapTaxStatusFk(dtype, taxRelTypeUuidStr);
430 if (statusFk == null){
431 return null;
432 }else if (statusFk == PesiTransformer.T_STATUS_ACCEPTED){
433 return "accepted";
434 }else if (statusFk == PesiTransformer.T_STATUS_SYNONYM){
435 return "synonym";
436 }else if (statusFk == PesiTransformer.T_STATUS_PRO_PARTE_SYN){
437 return "pro parte synonym";
438 }else if (statusFk == PesiTransformer.T_STATUS_PARTIAL_SYN){
439 return "partial synonym";
440 }
441 return null;
442 }
443
444 private Integer mapTaxStatusFk(String dtype, String taxRelTypeUuidStr) {
445 if (dtype == null){
446 return null;
447 }else if ("Synonym".equals(dtype)){
448 return PesiTransformer.T_STATUS_SYNONYM;
449 }else if ("Taxon".equals(dtype)){
450 UUID relTypeUuid = taxRelTypeUuidStr == null? null: UUID.fromString(taxRelTypeUuidStr);
451 if (TaxonRelationshipType.proParteUuids().contains(relTypeUuid)){
452 return PesiTransformer.T_STATUS_PRO_PARTE_SYN;
453 }else if (TaxonRelationshipType.partialUuids().contains(relTypeUuid)){
454 return PesiTransformer.T_STATUS_PARTIAL_SYN;
455 }else if (TaxonRelationshipType.misappliedNameUuids().contains(relTypeUuid)){
456 return PesiTransformer.T_STATUS_SYNONYM; //no explicit MAN status exists in PESI
457 }else{
458 return PesiTransformer.T_STATUS_ACCEPTED;
459 }
460 }
461 return null;
462 }
463
464 private String normalizeRank(String rankStr) {
465 if (rankStr == null){return null;
466 }else if (rankStr.equals("Convar")){return "Convariety";
467 }else if (rankStr.equals("Unranked (infrageneric)")){return "Tax. infragen.";
468 }else if (rankStr.equals("Unranked (infraspecific)")){return "Tax. infraspec.";
469 }else if (rankStr.equals("Coll. species")){return "Coll. Species";
470 }else if (rankStr.equals("Species Aggregate")){return "Aggregate";
471 }else if (rankStr.equals("Subsection bot.")){return "Subsection";
472 }return rankStr;
473 }
474
475 private String makeDisplayName(ResultSet srcRs) throws SQLException {
476 String nameCache = srcRs.getString("nameCache");
477 String nameTitle = srcRs.getString("nameTitleCache");
478 String taggedName = getTaggedNameTitle(nameCache, nameTitle);
479 String fullNameTitle = srcRs.getString("nameFullTitleCache");
480 String result = fullNameTitle
481 .replace(nameTitle, taggedName);
482 return result;
483 }
484
485 private String getTaggedNameTitle(String nameCache, String nameTitle) {
486 if (nameCache == null){
487 logger.warn("NameCache is null");
488 return nameTitle;
489 }
490 String result = null;
491 try {
492 String[] nameCacheSplit = nameCache.split(" ");
493 String[] nameTitleSplit = nameTitle.split(" ");
494 result = "";
495 boolean currentIsName = false;
496 for (int i=0, j=0; j < nameTitleSplit.length; j++){
497 if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
498 && !isMarker(nameCacheSplit[i])){
499 if(!currentIsName){
500 result += " <i>" + nameCacheSplit[i];
501 currentIsName = true;
502 }else{
503 result += " " + nameCacheSplit[i];
504 }
505 if((j+1)==nameTitleSplit.length){
506 result += "</i>";
507 }
508 i++;
509 }else{
510 if(currentIsName){
511 result += "</i>";
512 currentIsName = false;
513 }
514 result += " " + nameTitleSplit[j];
515 if (i < nameCacheSplit.length && nameCacheSplit[i].equals(nameTitleSplit[j])
516 && isMarker(nameCacheSplit[i])){
517 i++;
518 }
519 }
520 }
521 return result.trim();
522 } catch (Exception e) {
523 e.printStackTrace();
524 return result;
525 }
526 }
527
528 private boolean isMarker(String nameCacheSplit) {
529 return nameCacheSplit.endsWith(".") || nameCacheSplit.equals("[unranked]") ;
530 }
531
532 private boolean testSingleTaxonRelations(int n) throws SQLException {
533 boolean success = true;
534 ResultSet srcRS = source.getResultSet(""
535 + " SELECT t.name_id tid, pt.name_id pid "
536 + " FROM TaxonNode tn "
537 + " INNER JOIN TaxonBase t ON tn.taxon_id = t.id "
538 + " LEFT JOIN TaxonNode ptn ON ptn.id = tn.parent_id "
539 + " LEFT JOIN TaxonBase pt ON ptn.taxon_id = pt.id "
540 + " WHERE t.publish = 1 && pt.publish = 1 "
541 + " ORDER BY CAST(t.name_id as char(20)) ");
542
543 ResultSet destRS = destination.getResultSet("SELECT rel.*, t1.IdInSource t1Id, t2.IdInSource t2Id "
544 + " FROM RelTaxon rel "
545 + " LEFT JOIN Taxon t1 ON t1.TaxonId = rel.TaxonFk1 "
546 + " LEFT JOIN Taxon t2 ON t2.TaxonId = rel.TaxonFk2 "
547 + " WHERE t1."+origEuroMed+" AND t2." + origEuroMed + " AND RelTaxonQualifierFk = 101 "
548 + " ORDER BY t1.IdInSource");
549 int i = 0;
550 while (srcRS.next() && destRS.next()){
551 success &= testSingleTaxonRelation(srcRS, destRS);
552 i++;
553 }
554 success &= equals("Taxon relation count for single compare", n, i, String.valueOf(-1));
555 return success;
556 }
557
558 private boolean testSingleTaxonRelation(ResultSet srcRS, ResultSet destRS) throws SQLException {
559 String id = String.valueOf(srcRS.getInt("tid"));
560 boolean success = equals("Taxon relation taxon1", "NameId: " + srcRS.getInt("tid"), destRS.getString("t1Id"), id);
561 success &= equals("Taxon relation taxon2", "NameId: " + srcRS.getInt("pid"), destRS.getString("t2Id"), id);
562 success &= equals("Taxon relation qualifier fk", PesiTransformer.IS_TAXONOMICALLY_INCLUDED_IN, destRS.getInt("RelTaxonQualifierFk"), id);
563 success &= equals("Taxon relation qualifier cache", "is taxonomically included in", destRS.getString("RelQualifierCache"), id);
564 //TODO enable after next import
565 success &= isNull("notes", destRS);
566 //complete if no further relations need to added
567 return success;
568 }
569
570 private boolean testSingleAdditionalTaxonSources(int n) throws SQLException {
571 boolean success = true;
572 ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, MN.*, s.*, su.sourceuse_name "
573 + " FROM tu_sources MN INNER JOIN tu ON MN.tu_id = tu.id "
574 + " LEFT JOIN sources s ON s.id = MN.source_id "
575 + " LEFT JOIN sourceuses su ON MN.sourceuse_id = su.sourceuse_id "
576 + " ORDER BY CAST(tu.id as char(20)), MN.sourceuse_id, s.id "); //, no.note (not possible because ntext
577 ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, ats.*, s.*, su.* "
578 + " FROM AdditionalTaxonSource ats INNER JOIN Taxon t ON t.TaxonId = ats.TaxonFk "
579 + " INNER JOIN Source s ON s.SourceId = ats.SourceFk "
580 + " LEFT JOIN SourceUse su ON su.SourceUseId = ats.SourceUseFk "
581 + " WHERE t."+origEuroMed
582 + " ORDER BY t.IdInSource, su.SourceUseId, s.RefIdInSource ");
583 int count = 0;
584 while (srcRs.next() && destRs.next()){
585 success &= testSingleAdditionalTaxonSource(srcRs, destRs);
586 count++;
587 }
588 success &= equals("Notes count differs", n, count, "-1");
589 return success;
590 }
591
592 private boolean testSingleAdditionalTaxonSource(ResultSet srcRs, ResultSet destRs) throws SQLException {
593 String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("sourceuse_name"));
594 boolean success = equals("Additional taxon source taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
595 success &= equals("Additional taxon source fk ", srcRs.getString("source_id"), destRs.getString("RefIdInSource"), id); //currently we use the same id in ERMS and PESI
596 success &= equals("Additional taxon source use fk ", srcRs.getString("sourceuse_id"), destRs.getString("SourceUseFk"), id);
597 success &= equals("Additional taxon source use cache ", srcRs.getString("sourceuse_name"), destRs.getString("SourceUseCache"), id);
598 //TODO some records are still truncated ~ >820 characters
599 success &= equals("Additional taxon source name cache ", srcRs.getString("source_name"), destRs.getString("SourceNameCache"), id);
600 success &= equals("Additional taxon source detail ", srcRs.getString("pagenr"), destRs.getString("SourceDetail"), id);
601 //Complete
602 return success;
603 }
604
605 private boolean testSingleNotes(int n) throws SQLException {
606 boolean success = true;
607 ResultSet srcRs = source.getResultSet("SELECT CAST(tu.id as char(20)) tuId, no.*, l.LanName "
608 + " FROM notes no INNER JOIN tu ON no.tu_id = tu.id "
609 + " LEFT JOIN languages l ON l.LanID = no.lan_id "
610 + " ORDER BY CAST(tu.id as char(20)), no.type, no.noteSortable "); //, no.note (not possible because ntext
611 ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, no.*, cat.NoteCategory, l.Language "
612 + " FROM Note no INNER JOIN Taxon t ON t.TaxonId = no.TaxonFk "
613 + " LEFT JOIN NoteCategory cat ON cat.NoteCategoryId = no.NoteCategoryFk "
614 + " LEFT JOIN Language l ON l.LanguageId = no.LanguageFk "
615 + " WHERE t." + origEuroMed
616 + " AND NOT (NoteCategoryFk = 4 AND no.LastAction IS NULL) AND NOT NoteCategoryFk IN (22,23,24) "
617 + " ORDER BY t.IdInSource, no.NoteCategoryCache, Note_1 ");
618 int count = 0;
619 while (srcRs.next() && destRs.next()){
620 success &= testSingleNote(srcRs, destRs);
621 count++;
622 }
623 success &= equals("Notes count differs", n, count, "-1");
624 return success;
625 }
626
627 private boolean testSingleNote(ResultSet srcRs, ResultSet destRs) throws SQLException {
628 String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("type"));
629 boolean success = equals("Note taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
630 success &= equals("Note Note_1 ", srcRs.getString("note"), destRs.getString("Note_1"), id);
631 success &= isNull("Note_2", destRs);
632 success &= equals("Note category cache", normalizeNoteCatCache(srcRs.getString("type")), destRs.getString("NoteCategoryCache"), id);
633 success &= equals("Note language ", srcRs.getString("LanName"), destRs.getString("Language"), id);
634 success &= isNull("Region", destRs);
635 success &= isNull("SpeciesExpertGUID", destRs);
636 //SpeciesExpertName, LastAction, LastActionDate handled in separate method
637 //complete
638 return success;
639 }
640
641 private String normalizeNoteCatCache(String string) {
642 return StringUtils.capitalize(string)
643 .replace("Original Combination", "Original combination")
644 .replace("Taxonomic remark", "Taxonomic Remark");
645 }
646
647 private boolean testSingleDistributions(int n) throws SQLException {
648 boolean success = true;
649 ResultSet srcRs = source.getResultSet(
650 " SELECT CAST(tb.name_id as char(20)) AS tid, a.idInVocabulary, a.titleCache area, st.uuid statusUuid, "
651 + " CASE WHEN deb.updated IS NOT NULL THEN deb.updated ELSE deb.created END as lastActionDate, "
652 + " CASE WHEN deb.updated IS NOT NULL THEN 'changed' ELSE 'created' END as lastAction "
653 + " FROM DescriptionElementBase deb INNER JOIN DescriptionBase db ON deb.inDescription_id = db.id "
654 + " LEFT JOIN TaxonBase tb ON db.taxon_id = tb.id "
655 + " LEFT JOIN DefinedTermBase a ON a.id = deb.area_id "
656 + " LEFT JOIN DefinedTermBase st ON st.id = deb.status_id "
657 + distributionCountWhere
658 + " ORDER BY CAST(tb.name_id as char(20)), a.idInVocabulary, a.titleCache ");
659 ResultSet destRs = destination.getResultSet("SELECT t.IdInSource, a.AreaEmCode, oc.*, a.AreaName "
660 + " FROM Occurrence oc INNER JOIN Taxon t ON t.TaxonId = oc.TaxonFk "
661 + " LEFT JOIN Area a ON a.AreaId = oc.AreaFk "
662 + " WHERE t." + origEuroMed
663 + " ORDER BY t.IdInSource, a.AreaEmCode, a.AreaName, oc.Notes ");
664 int count = 0;
665 while (srcRs.next() && destRs.next()){
666 success &= testSingleDistribution(srcRs, destRs);
667 count++;
668 }
669 success &= equals("Distribution count differs", n, count, "-1");
670 return success;
671 }
672
673 private boolean testSingleDistribution(ResultSet srcRs, ResultSet destRs) throws SQLException {
674 String id = String.valueOf(srcRs.getInt("tid") + "-" + srcRs.getString("area"));
675 boolean success = equals("Distribution taxonID ", "NameId: " + String.valueOf(srcRs.getInt("tid")), destRs.getString("IdInSource"), id);
676 success &= equals("Distribution AreaEmCode ", srcRs.getString("idInVocabulary"), destRs.getString("AreaEmCode"), id);
677 // success &= equals("Distribution area name ", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaName"), id);
678 success &= equals("Distribution area name cache", normalizeDistrArea(srcRs.getString("area")), destRs.getString("AreaNameCache"), id);
679 success &= equals("Distribution OccurrenceStatusFk", mapStatus(srcRs.getString("statusUuid")), destRs.getInt("OccurrenceStatusFk"), id);
680 //TODO success &= equals("Distribution OccurrenceStatusCache", "Present", destRs.getString("OccurrenceStatusCache"), id);
681 success &= isNull("SourceFk", destRs); //sources should be moved to extra table only, according to script there were values, but in PESI 2014 values existed only in OccurrenceSource table (for all only E+M records)
682 success &= isNull("SourceCache", destRs); //sources should be moved to extra table, see above
683 //TODO success &= equals("Distribution notes ", srcRs.getString("note"), destRs.getString("Notes"), id);
684 success &= isNull("SpeciesExpertGUID", destRs); //SpeciesExpertGUID does not exist in EM and according to script
685 success &= isNull("SpeciesExpertName", destRs); //SpeciesExpertName does not exist in EM and according to script
686 success &= equals("Distribution Last Action", srcRs.getString("lastAction"), destRs.getString("LastAction"), id);
687 success &= equals("Distribution Last Action Date", srcRs.getTimestamp("lastActionDate"), destRs.getTimestamp("LastActionDate"), id);
688 return success;
689 }
690
691 /**
692 * @param string
693 * @return
694 */
695 private Integer mapStatus(String uuidStr) {
696 UUID uuid = UUID.fromString(uuidStr);
697 if (uuid.equals(PresenceAbsenceTerm.uuidNativeError) ){ //native, reported in error
698 return PesiTransformer.STATUS_ABSENT;
699 }else if (uuid.equals(PresenceAbsenceTerm.uuidIntroducesAdventitious) //casual, introduced adventitious
700 || uuid.equals(PresenceAbsenceTerm.uuidIntroducedUncertainDegreeNaturalisation)//introduced: uncertain degree of naturalisation
701 || uuid.equals(PresenceAbsenceTerm.uuidIntroduced)){
702 return PesiTransformer.STATUS_INTRODUCED;
703 }else if (uuid.equals(PresenceAbsenceTerm.uuidNative) ){ //native
704 return PesiTransformer.STATUS_NATIVE;
705 }else if (uuid.equals(PresenceAbsenceTerm.uuidNaturalised) ){ //naturalised
706 return PesiTransformer.STATUS_NATURALISED;
707 }else if (uuid.equals(PresenceAbsenceTerm.uuidNativePresenceQuestionable) ){ //native, presence questionable
708 return PesiTransformer.STATUS_DOUBTFUL;
709 }else if (uuid.equals(PresenceAbsenceTerm.uuidCultivated) ){ //cultivated
710 return PesiTransformer.STATUS_MANAGED;
711 }else if (uuid.equals(BerlinModelTransformer.uuidStatusUndefined) ){ //native, reported in error
712 return -1;
713 }
714
715 return null;
716 }
717
718 private String normalizeDistrArea(String area) {
719 if (area == null){
720 return null;
721 }else if ("France".equals(area)){return "French mainland";
722 }else if ("France, with Channel Islands and Monaco".equals(area)){return "France";
723 }else if ("Greece".equals(area)){return "Greece with Cyclades and more islands";
724 }else if ("Spain, with Gibraltar and Andorra (without Bl and Ca)".equals(area)){return "Spain";
725 }else if ("Italy, with San Marino and Vatican City (without Sa and Si(S))".equals(area)){return "Italy";
726 }else if ("Morocco, with Spanish territories".equals(area)){return "Morocco";
727 }else if ("Serbia including Kosovo and Vojvodina".equals(area)){return "Serbia including Vojvodina and with Kosovo";
728 }else if ("Caucasia (Ab + Ar + Gg + Rf(CS))".equals(area)){return "Caucasus region";
729 }else if ("Georgia, with Abchasia and Adzharia".equals(area)){return "Georgia";
730 }else if ("Canary Is.".equals(area)){return "Canary Islands";
731 }else if ("Kriti with Karpathos, Kasos & Gavdhos".equals(area)){return "Crete with Karpathos, Kasos & Gavdhos";
732 }else if ("Ireland, with N Ireland".equals(area)){return "Ireland";
733 }else if ("mainland Spain".equals(area)){return "Kingdom of Spain";
734 }else if ("Portugal".equals(area)){return "Portuguese mainland";
735 }else if ("Svalbard".equals(area)){return "Svalbard with Björnöya and Jan Mayen";
736 }else if ("Norway".equals(area)){return "Norwegian mainland";
737 }else if ("Ukraine".equals(area)){return "Ukraine including Crimea";
738 }else if ("Turkey-in-Europe".equals(area)){return "European Turkey";
739 }else if ("Azerbaijan".equals(area)){return "Azerbaijan including Nakhichevan";
740 }else if ("Ireland".equals(area)){return "Republic of Ireland";
741 }else if ("France".equals(area)){return "French mainland";
742 }
743 return area;
744 }
745
746 private boolean testSingleCommonNames(int n) throws SQLException {
747 boolean success = true;
748 ResultSet srcRs = source.getResultSet("SELECT v.*, ISNULL([639_3],[639_2]) iso, l.LanName, tu.id tuId "
749 + " FROM vernaculars v LEFT JOIN tu ON v.tu_id = tu.id LEFT JOIN languages l ON l.LanID = v.lan_id "
750 + " ORDER BY CAST(tu.id as char(20)), ISNULL([639_3],[639_2]), v.vername, v.id ");
751 ResultSet destRs = destination.getResultSet("SELECT cn.*, t.IdInSource, l.ISO639_2, l.ISO639_3 "
752 + " FROM CommonName cn INNER JOIN Taxon t ON t.TaxonId = cn.TaxonFk LEFT JOIN Language l ON l.LanguageId = cn.LanguageFk "
753 + " WHERE t." + origEuroMed
754 + " ORDER BY t.IdInSource, ISNULL("+preferredISO639+", "+alternativeISO639+"), cn.CommonName, cn.LastActionDate "); //sorting also lastActionDate results in a minimum of exact duplicate problems
755 int count = 0;
756 while (srcRs.next() && destRs.next()){
757 success &= testSingleCommonName(srcRs, destRs);
758 count++;
759 }
760 success &= equals("Common name count differs", n, count, "-1");
761 return success;
762 }
763
764 boolean prefer639_3 = true;
765 String preferredISO639 = prefer639_3? "ISO639_3":"ISO639_2";
766 String alternativeISO639 = prefer639_3? "ISO639_2":"ISO639_3";
767
768 private boolean testSingleCommonName(ResultSet srcRs, ResultSet destRs) throws SQLException {
769 String id = String.valueOf(srcRs.getInt("tuId") + "-" + srcRs.getString("lan_id"));
770 boolean success = equals("Common name taxonID ", "tu_id: " + String.valueOf(srcRs.getInt("tuId")), destRs.getString("IdInSource"), id);
771 success &= equals("CommonName name ", srcRs.getString("vername"), destRs.getString("CommonName"), id);
772 success &= equals("Common name languageFk ", srcRs.getString("iso"), getLanguageIso(destRs), id);
773 success = equals("CommonName LanguageCache ", normalizeLang(srcRs.getString("LanName")), destRs.getString("LanguageCache"), id);
774 //TODO needed? success = equals("CommonName language code ", srcRs.getString("lan_id"), destRs.getString("LanguageFk"), id);
775 success &= isNull("Region", destRs); //region does not seem to exist in ERMS
776 //TODO see comments
777 // success &= isNull("SourceFk", destRs); //sources should be moved to extra table, check with PESI 2014
778 // success &= isNull("SourceNameCache", destRs); //sources should be moved to extra table, check with PESI 2014
779 success &= isNull("SpeciesExpertGUID", destRs); //SpeciesExpertGUID does not exist in ERMS
780 //SpeciesExpertName,LastAction,LastActionDate handled in separate method
781 //complete
782 return success;
783 }
784
785 private String normalizeLang(String string) {
786 if ("Spanish".equals(string)){
787 return "Spanish, Castillian";
788 }else if ("Modern Greek (1453-)".equals(string)){
789 return "Greek";
790 }else if ("Malay (individual language)".equals(string)){
791 return "Malay";
792 }else if ("Swahili (individual language)".equals(string)){
793 return "Swahili";
794 }
795
796 return string;
797 }
798
799 private String getLanguageIso(ResultSet destRs) throws SQLException {
800 String result = destRs.getString(preferredISO639);
801 if (result == null){
802 result = destRs.getString(alternativeISO639);
803 }
804 return result;
805 }
806
807 private boolean testSingleReferences(int count) throws SQLException {
808 boolean success = true;
809 ResultSet srcRS = source.getResultSet("SELECT r.*, a.titleCache author "
810 + " FROM Reference r LEFT OUTER JOIN AgentBase a ON r.authorship_id = a.id "
811 + " ORDER BY r.id ");
812 ResultSet destRS = destination.getResultSet("SELECT s.* FROM Source s "
813 + " WHERE s." + origEuroMed
814 + " ORDER BY s.RefIdInSource "); // +1 for the source reference "erms" but this has no OriginalDB
815 int i = 0;
816 while (srcRS.next() && destRS.next()){
817 success &= testSingleReference(srcRS, destRS);
818 i++;
819 }
820 success &= equals("References count differs", count, i, "-1");
821 return success;
822 }
823
824 private boolean testSingleReference(ResultSet srcRS, ResultSet destRS) throws SQLException {
825 String id = String.valueOf(srcRS.getInt("id"));
826 boolean success = equals("Reference ID ", srcRS.getInt("id"), destRS.getInt("RefIdInSource"), id);
827 success &= isNull("IMIS_Id", destRS); //for E+M no IMIS id exists
828 success &= equals("Reference SourceCategoryFk ", convertSourceTypeFk(srcRS.getString("refType")), destRS.getInt("SourceCategoryFk"), id);
829 success &= equals("Reference SourceCategoryCache ", convertSourceTypeCache(srcRS.getString("refType")), destRS.getString("SourceCategoryCache"), id);
830 success &= equals("Reference name ", srcRS.getString("titleCache"), destRS.getString("Name"), id);
831 success &= equals("Reference abstract ", srcRS.getString("referenceAbstract"), destRS.getString("Abstract"), id);
832 success &= equals("Reference title ", srcRS.getString("title"), destRS.getString("Title"), id);
833 success &= equals("Reference author string ", srcRS.getString("author"), destRS.getString("AuthorString"), id);
834 //TODO
835 success &= equals("Reference year ", normalizeYear(srcRS), destRS.getString("RefYear"), id);
836 //FIXME
837 // success &= equals("Reference NomRefCache ", srcRS.getString("abbrevTitleCache"), destRS.getString("NomRefCache"), id);
838 success &= equals("Reference DOI ", srcRS.getString("doi"), destRS.getString("Doi"), id);
839 success &= equals("Reference link ", srcRS.getString("uri"), destRS.getString("Link"), id);
840 //TODO Notes
841 // success &= equals("Reference note ", srcRS.getString("source_note"), destRS.getString("Notes"), id);
842 //complete
843 return success;
844 }
845
846 private Integer convertSourceTypeFk(String sourceType) {
847 if (sourceType == null){
848 return null;
849 }else if ("DB".equals(sourceType)){
850 return PesiTransformer.REF_DATABASE;
851 }else if ("JOU".equals(sourceType)){
852 return PesiTransformer.REF_JOURNAL;
853 }else if ("BK".equals(sourceType)){
854 return PesiTransformer.REF_BOOK;
855 }else if ("GEN".equals(sourceType)){
856 return PesiTransformer.REF_UNRESOLVED;
857 }else if ("SER".equals(sourceType)){
858 // TODO correct?
859 return PesiTransformer.REF_UNRESOLVED;
860 }else if ("i".equals(sourceType)){
861 return 12;
862 }
863 return null;
864 }
865 private String convertSourceTypeCache(String sourceType) {
866 if (sourceType == null){
867 return null;
868 }else if ("DB".equals(sourceType)){
869 return "database";
870 }else if ("JOU".equals(sourceType)){
871 return "journal";
872 }else if ("BK".equals(sourceType)){
873 return "book";
874 }else if ("SER".equals(sourceType)){
875 return "published";
876 }else if ("BK".equals(sourceType)){
877 return "book";
878 }else if ("GEN".equals(sourceType)){
879 return "unresolved";
880 }else if ("i".equals(sourceType)){
881 //TODO
882 return "i";
883 }
884 return null;
885 }
886
887 private boolean testReferenceCount() {
888 int countSrc = source.getUniqueInteger(countReferencesStr);
889 int countDest = destination.getUniqueInteger("SELECT count(*) FROM Source s WHERE s."+ origEuroMed); // +1 for the source reference "erms" but this has no OriginalDB
890 boolean success = equals("Reference count ", countSrc, countDest, "-1");
891 return success;
892 }
893
894 private String normalizeYear(ResultSet rs) throws SQLException {
895 String freetext = rs.getString("datePublished_freetext");
896 if(StringUtils.isNotBlank(freetext)){
897 return freetext;
898 }
899 String start = rs.getString("datePublished_start");
900 String end = rs.getString("datePublished_end");
901 if (start != null){
902 start = start.substring(0,4);
903 }
904 if (end != null){
905 end = end.substring(0,4);
906 }
907 String result = start == null? null: start + (end==null? "": "-"+ end);
908 return result;
909 }
910
911 private boolean isNull(String attrName, ResultSet destRS) throws SQLException {
912 Object value = destRS.getObject(attrName);
913 if (value != null){
914 String message = attrName + " was expected to be null but was: " + value.toString();
915 logger.warn(message);
916 return false;
917 }else{
918 logger.info(attrName + " was null as expected");
919 return true;
920 }
921 }
922
923 private boolean equals(String messageStart, Timestamp srcDate, Timestamp destDate, String id) {
924 if (!CdmUtils.nullSafeEqual(srcDate, destDate)){
925 LocalDate date1 = srcDate.toLocalDateTime().toLocalDate();
926 LocalDate date2 = destDate.toLocalDateTime().toLocalDate();
927 if (date1.equals(date2) || date1.plusDays(1).equals(date2)){
928 logger.info(messageStart + " were (almost) equal: " + srcDate);
929 return true;
930 }else{
931 String message = id + ": " + messageStart + " must be equal, but was not.\n Source: "+ srcDate + "; Destination: " + destDate;
932 logger.warn(message);
933 return false;
934 }
935 }else{
936 logger.info(messageStart + " were equal: " + srcDate);
937 return true;
938 }
939 }
940
941 private boolean equals(String messageStart, Integer nSrc, Integer nDest, String id) {
942 String strId = id.equals("-1")? "": (id+ ": ");
943 if (!CdmUtils.nullSafeEqual(nSrc,nDest)){
944 String message = strId+ messageStart + " must be equal, but was not.\n Source: "+ nSrc + "; Destination: " + nDest;
945 logger.warn(message);
946 return false;
947 }else{
948 logger.info(strId + messageStart + " were equal: " + nSrc);
949 return true;
950 }
951 }
952
953 private boolean equals(String messageStart, String strSrc, String strDest, String id) {
954 if (StringUtils.isBlank(strSrc)){
955 strSrc = null;
956 }else{
957 strSrc = strSrc.trim();
958 }
959 //we do not trim strDest here because this should be done during import already. If not it should be shown here
960 if (!CdmUtils.nullSafeEqual(strSrc, strDest)){
961 int index = CdmUtils.diffIndex(strSrc, strDest);
962 String message = id+ ": " + messageStart + " must be equal, but was not at "+index+".\n Source: "+ strSrc + "\n Destination: " + strDest;
963 logger.warn(message);
964 return false;
965 }else{
966 logger.info(id+ ": " + messageStart + " were equal: " + strSrc);
967 return true;
968 }
969 }
970
971 protected Integer nullSafeInt(ResultSet rs, String columnName) throws SQLException {
972 Object intObject = rs.getObject(columnName);
973 if (intObject == null){
974 return null;
975 }else{
976 return Integer.valueOf(intObject.toString());
977 }
978 }
979
980 //** ************* MAIN ********************************************/
981
982
983
984 public static void main(String[] args){
985 PesiEuroMedValidator validator = new PesiEuroMedValidator();
986 validator.invoke(new Source(defaultSource), defaultDestination);
987 System.exit(0);
988 }
989 }