Revision 3f131c46
Added by Andreas Müller about 4 years ago
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/PesiFindIdenticalNamesActivator.java | ||
---|---|---|
11 | 11 |
import java.util.ArrayList; |
12 | 12 |
import java.util.Arrays; |
13 | 13 |
import java.util.HashMap; |
14 |
import java.util.HashSet; |
|
14 | 15 |
import java.util.Iterator; |
15 | 16 |
import java.util.List; |
16 | 17 |
import java.util.Map; |
... | ... | |
18 | 19 |
import java.util.UUID; |
19 | 20 |
|
20 | 21 |
import org.apache.log4j.Logger; |
22 |
import org.springframework.transaction.TransactionStatus; |
|
21 | 23 |
|
22 | 24 |
import eu.etaxonomy.cdm.api.application.CdmApplicationController; |
23 | 25 |
import eu.etaxonomy.cdm.app.common.CdmDestinations; |
... | ... | |
36 | 38 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
37 | 39 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
38 | 40 |
import eu.etaxonomy.cdm.model.taxon.TaxonNode; |
41 |
import eu.etaxonomy.cdm.model.taxon.TaxonRelationship; |
|
42 |
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType; |
|
39 | 43 |
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto; |
40 | 44 |
|
41 | 45 |
public class PesiFindIdenticalNamesActivator { |
... | ... | |
85 | 89 |
System.out.println("Start getIdenticalNames..."); |
86 | 90 |
|
87 | 91 |
Map<String, Map<UUID, Set<TaxonName>>> namesOfIdenticalTaxa; |
92 |
TransactionStatus tx = app.startTransaction(true); |
|
88 | 93 |
try { |
89 | 94 |
namesOfIdenticalTaxa = app.getTaxonService().findIdenticalTaxonNames(sourceRefUuids, propertyPaths); |
90 | 95 |
} catch (Exception e) { |
... | ... | |
93 | 98 |
} |
94 | 99 |
System.out.println("Start creating merging objects"); |
95 | 100 |
List<Map<UUID, PesiMergeObject>> mergingObjects = createMergeObjects(namesOfIdenticalTaxa, app); |
101 |
app.commitTransaction(tx); |
|
102 |
|
|
96 | 103 |
boolean resultOK = true; |
97 | 104 |
System.out.println("Start creating csv files"); |
98 | 105 |
resultOK &= writeSameNamesDifferentAuthorToCsv(mergingObjects, sources, path + "_authors.csv"); |
... | ... | |
180 | 187 |
} |
181 | 188 |
} |
182 | 189 |
|
183 |
private boolean isDifferent(Map<UUID, PesiMergeObject> merging, Method method) throws IllegalAccessException, IllegalArgumentException, InvocationTargetException { |
|
190 |
private boolean isDifferent(Map<UUID, PesiMergeObject> merging, Method method) |
|
191 |
throws IllegalAccessException, IllegalArgumentException, InvocationTargetException { |
|
184 | 192 |
|
185 | 193 |
if (method == null){ |
186 | 194 |
return true; |
... | ... | |
218 | 226 |
writer.append(';'); |
219 | 227 |
writer.append("rank"+i); |
220 | 228 |
writer.append(';'); |
221 |
writer.append("state"+i);
|
|
229 |
writer.append("status"+i);
|
|
222 | 230 |
writer.append(';'); |
223 | 231 |
writer.append("phylum"+i); |
224 | 232 |
writer.append(';'); |
... | ... | |
237 | 245 |
if(merging == null){ |
238 | 246 |
continue; |
239 | 247 |
} |
240 |
writer.append(sources.get(uuid)).append(";");
|
|
241 |
writer.append(merging.getUuidName()).append(";");
|
|
242 |
writer.append(merging.getIdInSource()).append(";");
|
|
243 |
writer.append(merging.getNameCache()).append(";");
|
|
244 |
writer.append(merging.getAuthor()).append(";");
|
|
245 |
writer.append(merging.getRank()).append(";");
|
|
248 |
writer.append(Nz(sources.get(uuid))).append(";");
|
|
249 |
writer.append(Nz(merging.getUuidName())).append(";");
|
|
250 |
writer.append(Nz(merging.getIdInSource())).append(";");
|
|
251 |
writer.append(Nz(merging.getNameCache())).append(";");
|
|
252 |
writer.append(Nz(merging.getAuthor())).append(";");
|
|
253 |
writer.append(Nz(merging.getRank())).append(";");
|
|
246 | 254 |
if (merging.isStatus()){ |
247 |
writer.append("accepted").append(";");;
|
|
255 |
writer.append("accepted").append(";"); |
|
248 | 256 |
}else{ |
249 |
writer.append("synonym").append(";");;
|
|
257 |
writer.append("synonym").append(";"); |
|
250 | 258 |
} |
251 |
writer.append(merging.getPhylum() != null? merging.getPhylum().getTitleCache(): "").append(";");
|
|
252 |
writer.append(merging.getParentString()).append(";");
|
|
253 |
writer.append(merging.getParentRankString()).append(";");;
|
|
259 |
writer.append(Nz(merging.getPhylum() != null? merging.getPhylum().getTitleCache(): "")).append(";");
|
|
260 |
writer.append(Nz(merging.getParentString())).append(";");
|
|
261 |
writer.append(Nz(merging.getParentRankString())).append(";");
|
|
254 | 262 |
} |
255 | 263 |
writer.append('\n'); |
256 | 264 |
} |
257 | 265 |
|
258 |
private List<Map<UUID,PesiMergeObject>> createMergeObjects(Map<String, Map<UUID, Set<TaxonName>>> names,
|
|
266 |
private List<Map<UUID,PesiMergeObject>> createMergeObjects(Map<String, Map<UUID, Set<TaxonName>>> names,
|
|
259 | 267 |
CdmApplicationController appCtr){ |
260 | 268 |
|
261 | 269 |
List<Map<UUID,PesiMergeObject>> merge = new ArrayList<>(); |
... | ... | |
298 | 306 |
//TODO: find the two correct names |
299 | 307 |
logger.warn("Name has not exact 1 but " + taxonBases.size() + " taxon base attached. This is not yet handled. Take arbitrary one."); |
300 | 308 |
} |
301 |
TaxonBase<?> taxonBase = taxonBases.iterator().next(); |
|
302 | 309 |
|
303 | 310 |
//uuid |
304 | 311 |
mergeObject.setUuidName(name.getUuid().toString()); |
... | ... | |
313 | 320 |
mergeObject.setRank(name.getRank().getLabel()); |
314 | 321 |
|
315 | 322 |
//Phylum |
316 |
TaxonNodeDto phylum = null; |
|
317 |
if (name.getRank().equals(Rank.PHYLUM())) { |
|
318 |
Taxon taxon = getAcceptedTaxon(name); |
|
319 |
if (taxon != null) { |
|
320 |
if (taxon.getTaxonNodes().size()>1){ |
|
321 |
logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one."); |
|
322 |
} |
|
323 |
TaxonNode node = taxon.getTaxonNodes().iterator().next(); |
|
324 |
phylum = new TaxonNodeDto(node); |
|
325 |
} |
|
326 |
|
|
327 |
} |
|
328 |
if (phylum == null && !name.getRank().isHigher(Rank.PHYLUM())){ |
|
329 |
Taxon taxon = getAcceptedTaxon(name); |
|
330 |
if (!taxon.getTaxonNodes().isEmpty()){ |
|
331 |
if (taxon.getTaxonNodes().size()>1){ |
|
332 |
logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one."); |
|
333 |
} |
|
334 |
TaxonNode node = taxon.getTaxonNodes().iterator().next(); |
|
335 |
phylum = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(node.getClassification(), Rank.PHYLUM(), name); |
|
336 |
} |
|
337 |
} |
|
323 |
TaxonNodeDto phylum = getPhylum(appCtr, name); |
|
338 | 324 |
mergeObject.setPhylum(phylum); |
339 | 325 |
|
340 | 326 |
//idInSource |
... | ... | |
348 | 334 |
|
349 | 335 |
//status and parent |
350 | 336 |
Set<Taxon> taxa = name.getTaxa(); |
337 |
taxa = getReallyAcceptedTaxa(taxa); |
|
351 | 338 |
if (!taxa.isEmpty()){ |
352 | 339 |
mergeObject.setStatus(true); |
353 | 340 |
Iterator<Taxon> taxaIterator = taxa.iterator(); |
... | ... | |
416 | 403 |
merge.add(mergeMap); |
417 | 404 |
} |
418 | 405 |
|
406 |
private TaxonNodeDto getPhylum(CdmApplicationController appCtr, TaxonName name) { |
|
407 |
TaxonNodeDto phylum = null; |
|
408 |
if (name.getRank().equals(Rank.PHYLUM())) { |
|
409 |
Taxon taxon = getAcceptedTaxon(name); |
|
410 |
if (taxon != null) { |
|
411 |
if (taxon.getTaxonNodes().size()>1){ |
|
412 |
logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one."); |
|
413 |
} |
|
414 |
TaxonNode node = taxon.getTaxonNodes().iterator().next(); |
|
415 |
phylum = new TaxonNodeDto(node); |
|
416 |
} |
|
417 |
|
|
418 |
} |
|
419 |
if (phylum == null && !name.getRank().isHigher(Rank.PHYLUM())){ |
|
420 |
Taxon taxon = getAcceptedTaxon(name); |
|
421 |
if (!taxon.getTaxonNodes().isEmpty()){ |
|
422 |
if (taxon.getTaxonNodes().size()>1){ |
|
423 |
logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one."); |
|
424 |
} |
|
425 |
TaxonNode node = taxon.getTaxonNodes().iterator().next(); |
|
426 |
phylum = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(node.getClassification(), Rank.PHYLUM(), name); |
|
427 |
} |
|
428 |
} |
|
429 |
return phylum; |
|
430 |
} |
|
431 |
|
|
419 | 432 |
private TaxonNode getAcceptedNode(TaxonName ermsName) { |
433 |
TaxonNode parentNode = null; |
|
420 | 434 |
Set<TaxonBase> taxonBases = ermsName.getTaxonBases(); |
421 |
Taxon taxon = null;
|
|
422 |
if (taxonBases != null && !taxonBases.isEmpty()) {
|
|
435 |
if (!taxonBases.isEmpty()) {
|
|
436 |
Taxon taxon = null;
|
|
423 | 437 |
TaxonBase<?> taxonBase = taxonBases.iterator().next(); |
424 | 438 |
if (taxonBase instanceof Synonym) { |
425 | 439 |
taxon = ((Synonym)taxonBase).getAcceptedTaxon(); |
440 |
}else{ |
|
441 |
taxon = getAccTaxonForTaxonSynonym((Taxon)taxonBase); |
|
442 |
} |
|
443 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
444 |
if (!nodes.isEmpty()) { |
|
445 |
parentNode = nodes.iterator().next(); |
|
426 | 446 |
} |
427 | 447 |
} |
428 | 448 |
|
429 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
430 |
|
|
431 |
TaxonNode parentNode = null; |
|
432 |
if (nodes != null && !nodes.isEmpty()) { |
|
433 |
parentNode = nodes.iterator().next(); |
|
434 |
} |
|
435 | 449 |
return parentNode; |
436 | 450 |
} |
437 | 451 |
|
... | ... | |
440 | 454 |
//prefer accepted taxon |
441 | 455 |
if (name.getTaxa() != null && !name.getTaxa().isEmpty()){ |
442 | 456 |
taxon = name.getTaxa().iterator().next(); |
457 |
taxon = getAccTaxonForTaxonSynonym(taxon); |
|
443 | 458 |
//else take synonym |
444 | 459 |
}else if (name.getTaxonBases() != null && !name.getTaxonBases().isEmpty()){ |
445 | 460 |
TaxonBase<?> taxonBase = name.getTaxonBases().iterator().next(); |
... | ... | |
451 | 466 |
return taxon; |
452 | 467 |
} |
453 | 468 |
|
469 |
private Taxon getAccTaxonForTaxonSynonym(Taxon taxon) { |
|
470 |
if (!taxon.getRelationsFromThisTaxon().isEmpty()){ |
|
471 |
for (TaxonRelationship rel: taxon.getRelationsFromThisTaxon()){ |
|
472 |
UUID uuidType = rel.getType().getUuid(); |
|
473 |
if (uuidType.equals(TaxonRelationshipType.uuidSynonymOfTaxonRelationship) |
|
474 |
|| uuidType.equals(TaxonRelationshipType.uuidHeterotypicSynonymTaxonRelationship) |
|
475 |
|| uuidType.equals(TaxonRelationshipType.uuidHomotypicSynonymTaxonRelationship)){ |
|
476 |
taxon = rel.getToTaxon(); |
|
477 |
} |
|
478 |
} |
|
479 |
} |
|
480 |
return taxon; |
|
481 |
} |
|
482 |
|
|
483 |
/** |
|
484 |
* Filters out the ERMS taxon synonyms |
|
485 |
*/ |
|
486 |
private Set<Taxon> getReallyAcceptedTaxa(Set<Taxon> taxa) { |
|
487 |
Set<Taxon> result = new HashSet<>(); |
|
488 |
for (Taxon taxon : taxa){ |
|
489 |
Taxon accTaxon = getAccTaxonForTaxonSynonym(taxon); |
|
490 |
if(taxon.equals(accTaxon)) { |
|
491 |
result.add(taxon); |
|
492 |
} |
|
493 |
} |
|
494 |
return result; |
|
495 |
} |
|
496 |
|
|
497 |
private CharSequence Nz(String str) { |
|
498 |
return CdmUtils.Nz(str); |
|
499 |
} |
|
500 |
|
|
454 | 501 |
public static void main(String[] args) { |
455 | 502 |
PesiFindIdenticalNamesActivator activator = new PesiFindIdenticalNamesActivator(); |
456 | 503 |
activator.invoke(pesiSource); |
Also available in: Unified diff
ref #1447 handle null and ERMS pseudo taxa and some minor changes