Revision 17d546a5
Added by Katja Luther over 4 years ago
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/FaunaEuErmsFindIdenticalNamesActivator.java | ||
---|---|---|
3 | 3 |
import java.io.FileWriter; |
4 | 4 |
import java.io.IOException; |
5 | 5 |
import java.util.ArrayList; |
6 |
import java.util.HashSet; |
|
7 | 6 |
import java.util.Iterator; |
8 | 7 |
import java.util.List; |
9 | 8 |
import java.util.Map; |
10 | 9 |
import java.util.Set; |
11 | 10 |
import java.util.UUID; |
12 | 11 |
|
13 |
import com.sun.media.jfxmedia.logging.Logger;
|
|
12 |
import org.apache.log4j.Logger;
|
|
14 | 13 |
|
15 | 14 |
import eu.etaxonomy.cdm.api.application.CdmApplicationController; |
16 | 15 |
import eu.etaxonomy.cdm.app.common.CdmDestinations; |
17 |
import eu.etaxonomy.cdm.app.util.TestDatabase;
|
|
16 |
import eu.etaxonomy.cdm.app.pesi.ErmsImportActivator;
|
|
18 | 17 |
import eu.etaxonomy.cdm.database.DbSchemaValidation; |
19 | 18 |
import eu.etaxonomy.cdm.database.ICdmDataSource; |
20 | 19 |
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper; |
... | ... | |
27 | 26 |
import eu.etaxonomy.cdm.model.name.TaxonName; |
28 | 27 |
import eu.etaxonomy.cdm.model.reference.Reference; |
29 | 28 |
import eu.etaxonomy.cdm.model.taxon.Classification; |
29 |
import eu.etaxonomy.cdm.model.taxon.Synonym; |
|
30 | 30 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
31 | 31 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
32 | 32 |
import eu.etaxonomy.cdm.model.taxon.TaxonNode; |
33 | 33 |
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto; |
34 | 34 |
|
35 | 35 |
public class FaunaEuErmsFindIdenticalNamesActivator { |
36 |
|
|
36 |
private static final Logger logger = Logger.getLogger(FaunaEuErmsFindIdenticalNamesActivator.class); |
|
37 | 37 |
//static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2(); |
38 | 38 |
static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.cdm_test_local_faunaEu_mysql(); |
39 | 39 |
static Reference faunaSec; |
... | ... | |
83 | 83 |
faunaSec = appCtrFaunaEu.getReferenceService().load(UUID.fromString("6786d863-75d4-4796-b916-c1c3dff4cb70")); |
84 | 84 |
ermsSec = appCtrFaunaEu.getReferenceService().load(UUID.fromString("7744bc26-f914-42c4-b54a-dd2a030a8bb7")); |
85 | 85 |
Map<String, List<TaxonName>> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(ermsSec, faunaSec, propertyPaths); |
86 |
//List<UUID> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(propertyPaths); |
|
87 |
|
|
88 |
System.err.println("first name: " + namesOfIdenticalTaxa.get(0) + " " + namesOfIdenticalTaxa.size()); |
|
89 |
//TaxonName zooName = namesOfIdenticalTaxa.get(0); |
|
90 |
//System.err.println(zooName + " nr of taxa " + namesOfIdenticalTaxa.size()); |
|
91 |
//TaxonNameComparator taxComp = new TaxonNameComparator(); |
|
92 |
|
|
93 |
//Collections.sort(namesOfIdenticalTaxa,taxComp); |
|
94 |
System.err.println(namesOfIdenticalTaxa.get(0) + " - " + namesOfIdenticalTaxa.get(1) + " - " + namesOfIdenticalTaxa.get(2)); |
|
86 |
|
|
95 | 87 |
List<FaunaEuErmsMerging> mergingObjects = new ArrayList<>(); |
96 | 88 |
FaunaEuErmsMerging mergeObject; |
97 | 89 |
TaxonName faunaEuTaxName; |
98 | 90 |
TaxonName ermsTaxName; |
99 |
|
|
91 |
System.err.println("Start creating merging objects"); |
|
100 | 92 |
mergingObjects= sc.createMergeObjects(namesOfIdenticalTaxa, appCtrFaunaEu); |
93 |
boolean resultOK = true; |
|
94 |
System.err.println("Start creating csv files"); |
|
95 |
resultOK = resultOK && sc.writeSameNamesdifferentAuthorToCsv(mergingObjects, sFileName + "_authors.csv"); |
|
96 |
resultOK = resultOK &&sc.writeSameNamesdifferentStatusToCsv(mergingObjects, sFileName + "_status.csv"); |
|
97 |
resultOK = resultOK &&sc.writeSameNamesToCsVFile(mergingObjects, sFileName + "_names.csv"); |
|
98 |
resultOK = resultOK &&sc.writeSameNamesdifferentPhylumToCsv(mergingObjects, sFileName + "_phylum.csv"); |
|
99 |
resultOK = resultOK &&sc.writeSameNamesDifferentParentToCsv(mergingObjects, sFileName + "parent.csv"); |
|
101 | 100 |
|
102 |
sc.writeSameNamesdifferentAuthorToCsv(mergingObjects, sFileName + "_authors.csv"); |
|
103 |
sc.writeSameNamesdifferentStatusToCsv(mergingObjects, sFileName + "_status.csv"); |
|
104 |
sc.writeSameNamesToCsVFile(mergingObjects, sFileName + "_names.csv"); |
|
105 |
sc.writeSameNamesdifferentPhylumToCsv(mergingObjects, sFileName + "_phylum.csv"); |
|
106 |
|
|
107 |
|
|
108 |
System.out.println("End merging Fauna Europaea and Erms"); |
|
101 |
System.err.println("End merging Fauna Europaea and Erms" + resultOK); |
|
102 |
System.exit(0); |
|
109 | 103 |
|
110 | 104 |
} |
111 | 105 |
|
... | ... | |
157 | 151 |
} |
158 | 152 |
return true; |
159 | 153 |
} |
154 |
|
|
155 |
private boolean writeSameNamesDifferentParentToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){ |
|
156 |
try |
|
157 |
{ |
|
158 |
FileWriter writer = new FileWriter(sfileName); |
|
159 |
|
|
160 |
//create Header |
|
161 |
String firstLine = "same names but different parent"; |
|
162 |
createHeader(writer, firstLine); |
|
163 |
|
|
164 |
//write data |
|
165 |
for (FaunaEuErmsMerging merging : mergingObjects){ |
|
166 |
//TODO |
|
167 |
if ((merging.getParentStringInErms()== null )^ (merging.getParentStringInFaunaEu()== null)){ |
|
168 |
writeCsvLine(writer, merging) ; |
|
169 |
}else if(!((merging.getParentStringInErms()==null) && (merging.getParentStringInFaunaEu()==null))){ |
|
170 |
if(!merging.getParentStringInErms().equals(merging.getParentStringInFaunaEu())){ |
|
171 |
writeCsvLine(writer, merging) ; |
|
172 |
} |
|
173 |
} |
|
174 |
} |
|
175 |
writer.flush(); |
|
176 |
writer.close(); |
|
177 |
} |
|
178 |
catch(IOException e) |
|
179 |
{ |
|
180 |
return false; |
|
181 |
} |
|
182 |
return true; |
|
183 |
} |
|
160 | 184 |
|
161 | 185 |
private boolean writeSameNamesdifferentRankToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){ |
162 | 186 |
try |
... | ... | |
264 | 288 |
//write data |
265 | 289 |
for (FaunaEuErmsMerging merging : mergingObjects){ |
266 | 290 |
|
267 |
if (!merging.getAuthorInErms().equals(merging.getAuthorInFaunaEu())){ |
|
291 |
if (merging.getAuthorInErms() != null && merging.getAuthorInFaunaEu() != null && !merging.getAuthorInErms().equals(merging.getAuthorInFaunaEu())){
|
|
268 | 292 |
writeCsvLine(writer, merging); |
293 |
}else if ((merging.getAuthorInErms() == null && merging.getAuthorInFaunaEu() != null) || (merging.getAuthorInErms() != null && merging.getAuthorInFaunaEu() == null)) { |
|
294 |
writeCsvLine(writer, merging); |
|
269 | 295 |
} |
270 | 296 |
} |
271 | 297 |
|
... | ... | |
298 | 324 |
writer.append("synonym"); |
299 | 325 |
} |
300 | 326 |
writer.append(';'); |
301 |
writer.append(merging.getPhylumInFaunaEu().getTaxonTitleCache());
|
|
327 |
writer.append(merging.getPhylumInFaunaEu() != null? merging.getPhylumInFaunaEu().getTaxonTitleCache(): "");
|
|
302 | 328 |
writer.append(';'); |
303 | 329 |
writer.append(merging.getParentStringInFaunaEu()); |
304 | 330 |
writer.append(';'); |
... | ... | |
322 | 348 |
} |
323 | 349 |
|
324 | 350 |
writer.append(';'); |
325 |
writer.append(merging.getPhylumInErms().getTaxonTitleCache());
|
|
351 |
writer.append(merging.getPhylumInErms() != null? merging.getPhylumInErms().getTaxonTitleCache():"");
|
|
326 | 352 |
writer.append(';'); |
327 | 353 |
writer.append(merging.getParentStringInErms()); |
328 | 354 |
writer.append(';'); |
... | ... | |
332 | 358 |
|
333 | 359 |
|
334 | 360 |
private List<FaunaEuErmsMerging> createMergeObjects(Map<String,List<TaxonName>> names, CdmApplicationController appCtr){ |
335 |
|
|
361 |
Classification faunaEuClassification = appCtr.getClassificationService().load(UUID.fromString("44d8605e-a7ce-41e1-bee9-99edfec01e7c")); |
|
362 |
Classification ermsClassification = appCtr.getClassificationService().load(UUID.fromString("6fa988a9-10b7-48b0-a370-2586fbc066eb")); |
|
336 | 363 |
List<FaunaEuErmsMerging> merge = new ArrayList<>(); |
337 |
TaxonName zooName, zooName2; |
|
364 |
|
|
338 | 365 |
FaunaEuErmsMerging mergeObject; |
339 | 366 |
String idInSource1; |
340 | 367 |
List<TaxonName> identicalNames; |
368 |
|
|
341 | 369 |
for (String nameCache: names.keySet()){ |
342 | 370 |
identicalNames = names.get(nameCache); |
343 | 371 |
|
344 | 372 |
mergeObject = new FaunaEuErmsMerging(); |
345 |
//TODO:überprüfen, ob die beiden Namen identisch sind und aus unterschiedlichen DB kommen |
|
346 |
Classification faunaEuClassification = appCtr.getClassificationService().load(UUID.fromString("44d8605e-a7ce-41e1-bee9-99edfec01e7c")); |
|
347 |
Classification ermsClassification = appCtr.getClassificationService().load(UUID.fromString("6fa988a9-10b7-48b0-a370-2586fbc066eb")); |
|
373 |
|
|
374 |
|
|
348 | 375 |
//getPhylum |
349 | 376 |
TaxonNodeDto phylum1 = null; |
350 | 377 |
TaxonName faunaEuName = null; |
... | ... | |
366 | 393 |
//TODO: find the two correct names |
367 | 394 |
} |
368 | 395 |
}else { |
369 |
System.err.println(nameCache + " has more than two identical namecaches");
|
|
370 |
return null;
|
|
396 |
logger.debug(nameCache + " has more than two identical namecaches");
|
|
397 |
continue;
|
|
371 | 398 |
} |
372 | 399 |
phylum1 = null; |
373 |
if (faunaEuName != null && !faunaEuName.getRank().isHigher(Rank.PHYLUM())){ |
|
400 |
if (faunaEuName.getRank().equals(Rank.PHYLUM())) { |
|
401 |
Taxon taxon = null; |
|
402 |
taxon = getAcceptedTaxon(faunaEuName); |
|
403 |
if (taxon != null) { |
|
404 |
phylum1 = new TaxonNodeDto(taxon.getTaxonNode(faunaEuClassification)); |
|
405 |
} |
|
406 |
|
|
407 |
} |
|
408 |
if (phylum1 == null && faunaEuName != null && !faunaEuName.getRank().isHigher(Rank.PHYLUM())){ |
|
374 | 409 |
phylum1 =appCtr.getTaxonNodeService().taxonNodeDtoParentRank(faunaEuClassification, Rank.PHYLUM(), faunaEuName); |
375 | 410 |
} |
376 | 411 |
|
377 | 412 |
TaxonNodeDto phylum2 = null; |
378 |
if (ermsName != null && !ermsName.getRank().isHigher(Rank.PHYLUM())){ |
|
413 |
if (ermsName.getRank().equals(Rank.PHYLUM())) { |
|
414 |
Taxon taxon = null; |
|
415 |
taxon = getAcceptedTaxon(ermsName); |
|
416 |
if (taxon != null) { |
|
417 |
phylum2 = new TaxonNodeDto(taxon.getTaxonNode(ermsClassification)); |
|
418 |
} |
|
419 |
|
|
420 |
} |
|
421 |
if (phylum2 == null && ermsName != null && !ermsName.getRank().isHigher(Rank.PHYLUM())){ |
|
379 | 422 |
phylum2 = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(ermsClassification, Rank.PHYLUM(), ermsName); |
380 | 423 |
} |
381 | 424 |
mergeObject.setPhylumInErms(phylum1); |
... | ... | |
429 | 472 |
String parentNameCache = parentName.getNameCache(); |
430 | 473 |
mergeObject.setParentStringInErms(parentNameCache); |
431 | 474 |
mergeObject.setParentRankStringInErms(parentName.getRank().getLabel()); |
432 |
//System.err.println("parentName: " + parentNameCache); |
|
475 |
|
|
433 | 476 |
} |
434 | 477 |
}else{ |
435 | 478 |
mergeObject.setStatInErms(false); |
479 |
TaxonNode parentNode = getAcceptedNode(ermsName); |
|
480 |
//TODO: ändern mit erweitertem Initializer.. |
|
481 |
if (parentNode != null){ |
|
482 |
TaxonName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName()); |
|
483 |
String parentNameCache = parentName.getNameCache(); |
|
484 |
mergeObject.setParentStringInErms(parentNameCache); |
|
485 |
mergeObject.setParentRankStringInErms(parentName.getRank().getLabel()); |
|
486 |
|
|
487 |
} |
|
436 | 488 |
} |
437 | 489 |
taxa = faunaEuName.getTaxa(); |
438 | 490 |
if (!taxa.isEmpty()){ |
... | ... | |
462 | 514 |
String parentNameCache = parentName.getNameCache(); |
463 | 515 |
mergeObject.setParentStringInFaunaEu(parentNameCache); |
464 | 516 |
mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel()); |
465 |
System.err.println("parentName: " + parentNameCache); |
|
517 |
|
|
466 | 518 |
}else{ |
467 |
System.err.println("no zoologicalName: " + parentNode.getTaxon().getName().getTitleCache() +" . "+parentNode.getTaxon().getName().getUuid());
|
|
519 |
logger.debug("no zoologicalName: " + parentNode.getTaxon().getName().getTitleCache() +" . "+parentNode.getTaxon().getName().getUuid());
|
|
468 | 520 |
} |
469 | 521 |
|
470 | 522 |
} |
471 |
}else{ |
|
472 |
mergeObject.setStatInErms(false); |
|
473 |
} |
|
474 |
taxa = faunaEuName.getTaxa(); |
|
475 |
if (!taxa.isEmpty()){ |
|
476 |
mergeObject.setStatInFaunaEu(true); |
|
477 | 523 |
}else{ |
478 | 524 |
mergeObject.setStatInFaunaEu(false); |
479 |
|
|
525 |
TaxonNode parentNode = getAcceptedNode(faunaEuName); |
|
526 |
//TODO: ändern mit erweitertem Initializer.. |
|
527 |
if (parentNode != null){ |
|
528 |
TaxonName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName()); |
|
529 |
String parentNameCache = parentName.getNameCache(); |
|
530 |
mergeObject.setParentStringInFaunaEu(parentNameCache); |
|
531 |
mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel()); |
|
532 |
|
|
533 |
} |
|
480 | 534 |
} |
535 |
|
|
481 | 536 |
|
482 | 537 |
mergeObject.setRankInErms(ermsName.getRank().getLabel()); |
483 | 538 |
mergeObject.setRankInFaunaEu(faunaEuName.getRank().getLabel()); |
... | ... | |
513 | 568 |
return merge; |
514 | 569 |
|
515 | 570 |
} |
571 |
|
|
572 |
|
|
573 |
/** |
|
574 |
* @param ermsName |
|
575 |
* @return |
|
576 |
*/ |
|
577 |
private TaxonNode getAcceptedNode(TaxonName ermsName) { |
|
578 |
Set<TaxonBase> taxonBases = ermsName.getTaxonBases(); |
|
579 |
Taxon taxon = null; |
|
580 |
if (taxonBases != null && !taxonBases.isEmpty()) { |
|
581 |
TaxonBase taxonBase = taxonBases.iterator().next(); |
|
582 |
if (taxonBase instanceof Synonym) { |
|
583 |
taxon = ((Synonym)taxonBase).getAcceptedTaxon(); |
|
584 |
} |
|
585 |
} |
|
586 |
|
|
587 |
|
|
588 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
589 |
|
|
590 |
TaxonNode node, parentNode = null; |
|
591 |
if (nodes != null && !nodes.isEmpty()) { |
|
592 |
parentNode = nodes.iterator().next(); |
|
593 |
} |
|
594 |
return parentNode; |
|
595 |
} |
|
596 |
|
|
597 |
|
|
598 |
/** |
|
599 |
* @param ermsName |
|
600 |
* @param taxon |
|
601 |
* @return |
|
602 |
*/ |
|
603 |
private Taxon getAcceptedTaxon(TaxonName ermsName) { |
|
604 |
Taxon taxon = null; |
|
605 |
if (ermsName.getTaxa() != null && !ermsName.getTaxa().isEmpty()){ |
|
606 |
taxon = ermsName.getTaxa().iterator().next(); |
|
607 |
|
|
608 |
}else if (ermsName.getTaxonBases() != null && !ermsName.getTaxonBases().isEmpty()){ |
|
609 |
TaxonBase taxonBase = ermsName.getTaxonBases().iterator().next(); |
|
610 |
if (taxonBase instanceof Synonym) { |
|
611 |
Synonym syn = (Synonym)taxonBase; |
|
612 |
taxon = syn.getAcceptedTaxon(); |
|
613 |
} |
|
614 |
} |
|
615 |
return taxon; |
|
616 |
} |
|
516 | 617 |
} |
Also available in: Unified diff
ref #1445: smaller changes in activators