18 |
18 |
import java.util.Set;
|
19 |
19 |
import java.util.UUID;
|
20 |
20 |
|
|
21 |
import org.apache.commons.lang3.StringUtils;
|
21 |
22 |
import org.apache.log4j.Logger;
|
22 |
23 |
import org.springframework.transaction.TransactionStatus;
|
23 |
24 |
|
... | ... | |
41 |
42 |
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
|
42 |
43 |
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto;
|
43 |
44 |
|
|
45 |
/**
|
|
46 |
* Finds taxa with identical {@link TaxonName#getNameCache() name cache} but from different
|
|
47 |
* sources (import source) and writes them into multiple csv file.
|
|
48 |
* All cases are stored in file xxx_namesAll, some prefiltered files are created for e.g.
|
|
49 |
* those having different parents or different authors.
|
|
50 |
* Taxa are pairwise compared. If a name appears in 3 sources for each of the 3 pairs 1 record
|
|
51 |
* is created below each other. Also if a name appears multiple times (e.g. homonyms) in 1
|
|
52 |
* DB and 1 time in another. Each of the multiple names is compared to the other databases
|
|
53 |
* record.
|
|
54 |
* <BR><BR>
|
|
55 |
*
|
|
56 |
* TODO is is necessary to create these extra files? Filters can also be appied in Excel.
|
|
57 |
*
|
|
58 |
* @author a.mueller
|
|
59 |
* @since 22.01.2020
|
|
60 |
*/
|
44 |
61 |
public class PesiFindIdenticalNamesActivator {
|
45 |
62 |
|
46 |
63 |
private static final Logger logger = Logger.getLogger(PesiFindIdenticalNamesActivator.class);
|
... | ... | |
219 |
236 |
}
|
220 |
237 |
}
|
221 |
238 |
|
222 |
|
//old method when all sources were in 1 line
|
223 |
|
private boolean isDifferent(Map<UUID, PesiMergeObject> merging, Method method)
|
224 |
|
throws IllegalAccessException, IllegalArgumentException, InvocationTargetException {
|
225 |
|
|
226 |
|
if (method == null){
|
227 |
|
return true;
|
228 |
|
}
|
229 |
|
Object value = null;
|
230 |
|
boolean isFirst = true;
|
231 |
|
for (UUID sourceUuid: merging.keySet()){
|
232 |
|
if (isFirst){
|
233 |
|
value = method.invoke(merging.get(sourceUuid));
|
234 |
|
isFirst = false;
|
235 |
|
}else{
|
236 |
|
Object newValue = method.invoke(merging.get(sourceUuid));
|
237 |
|
if (!CdmUtils.nullSafeEqual(newValue, value)){
|
238 |
|
return true;
|
239 |
|
}
|
240 |
|
}
|
241 |
|
}
|
242 |
|
return false;
|
243 |
|
}
|
244 |
|
|
245 |
239 |
private void createHeader(Writer writer, String firstLine){
|
246 |
240 |
try {
|
247 |
241 |
writer.append(firstLine);
|
248 |
242 |
writer.append('\n');
|
249 |
|
writeHeaderPair(writer, "taxon uuid");
|
250 |
|
writeHeaderPair(writer, "taxon id");
|
251 |
|
writer.append("next name cache").append(";");
|
252 |
|
writer.append("diff").append(";");
|
253 |
|
writeHeaderPair(writer, "source");
|
254 |
|
writeHeaderPair(writer, "name uuid");
|
255 |
|
writeHeaderPair(writer, "idInSource");
|
256 |
|
writeHeaderPair(writer, "nameCache");
|
|
243 |
writeHeaderPair(writer, "tid");
|
|
244 |
writer.append("use;");
|
|
245 |
writer.append("nameUse;");
|
|
246 |
writer.append("next;");
|
|
247 |
writer.append("diff;");
|
|
248 |
writeHeaderPair(writer, "src");
|
|
249 |
// writeHeaderPair(writer, "nuuid");
|
|
250 |
// writeHeaderPair(writer, "idInSource");
|
|
251 |
writer.append("nameCache;");
|
257 |
252 |
writeHeaderPair(writer, "author");
|
|
253 |
writeHeaderPair(writer, "nom.ref.");
|
258 |
254 |
writeHeaderPair(writer, "rank");
|
259 |
255 |
writeHeaderPair(writer, "kingdom");
|
260 |
256 |
writeHeaderPair(writer, "phylum");
|
... | ... | |
262 |
258 |
writeHeaderPair(writer, "parentString");
|
263 |
259 |
writeHeaderPair(writer, "parentRankString");
|
264 |
260 |
writeHeaderPair(writer, "status");
|
|
261 |
writeHeaderPair(writer, "tuuid");
|
|
262 |
|
265 |
263 |
writer.append('\n');
|
266 |
264 |
} catch (IOException e) {
|
267 |
265 |
e.printStackTrace();
|
... | ... | |
277 |
275 |
PesiMergeObject merge1, PesiMergeObject merge2,
|
278 |
276 |
Method method, boolean isNextNameCache){
|
279 |
277 |
|
280 |
|
writePair(writer, merge1, merge2, "UuidTaxon");
|
281 |
|
writePair(writer, merge1, merge2, "IdTaxon");
|
|
278 |
writePair(writer, merge1, merge2, "IdTaxon", Compare.NO);
|
|
279 |
writeSingleValue(writer, "");
|
|
280 |
writeSingleValue(writer, "");
|
282 |
281 |
writeSingleValue(writer, isNextNameCache?"1":"0");
|
283 |
282 |
boolean different = isDifferent(merge1, merge2, method);
|
284 |
283 |
writeSingleValue(writer, different?"1":"0");
|
285 |
284 |
writeSingleValue(writer, sourcesLabels.get(UUID.fromString(merge1.getUuidSource())));
|
286 |
285 |
writeSingleValue(writer, sourcesLabels.get(UUID.fromString(merge2.getUuidSource())));
|
287 |
|
writePair(writer, merge1, merge2, "UuidName");
|
288 |
|
writePair(writer, merge1, merge2, "IdInSource");
|
289 |
|
writePair(writer, merge1, merge2, "NameCache");
|
290 |
|
writePair(writer, merge1, merge2, "Author");
|
291 |
|
writePair(writer, merge1, merge2, "Rank");
|
292 |
|
writePairNode(writer, merge1, merge2, "Kingdom");
|
293 |
|
writePairNode(writer, merge1, merge2, "Phylum");
|
294 |
|
writePairNode(writer, merge1, merge2, "Family");
|
295 |
|
writePair(writer, merge1, merge2, "ParentString");
|
296 |
|
writePair(writer, merge1, merge2, "ParentRankString");
|
297 |
|
writeSingleValue(writer, merge1.isStatus()?"accepted":"synonym");
|
298 |
|
writeSingleValue(writer, merge2.isStatus()?"accepted":"synonym");
|
|
286 |
// writePair(writer, merge1, merge2, "UuidName");
|
|
287 |
// writePair(writer, merge1, merge2, "IdInSource");
|
|
288 |
writeSingleValue(writer, merge1.getNameCache());
|
|
289 |
// writePair(writer, merge1, merge2, "NameCache");
|
|
290 |
writePair(writer, merge1, merge2, "Author", Compare.YES);
|
|
291 |
writePair(writer, merge1, merge2, "NomenclaturalReference", Compare.YES);
|
|
292 |
writePair(writer, merge1, merge2, "Rank", Compare.YES);
|
|
293 |
writePair(writer, merge1, merge2, "KingdomCache", Compare.KEEP_FIRST);
|
|
294 |
writePair(writer, merge1, merge2, "PhylumCache", Compare.YES);
|
|
295 |
writePair(writer, merge1, merge2, "FamilyCache", Compare.YES);
|
|
296 |
writePair(writer, merge1, merge2, "ParentString", Compare.YES);
|
|
297 |
writePair(writer, merge1, merge2, "ParentRankString", Compare.YES);
|
|
298 |
writePair(writer, merge1, merge2, "StatusStr", Compare.YES);
|
|
299 |
writePair(writer, merge1, merge2, "UuidTaxon", Compare.YES);
|
|
300 |
|
299 |
301 |
try {
|
300 |
302 |
writer.append('\n');
|
301 |
303 |
} catch (IOException e) {
|
... | ... | |
311 |
313 |
}
|
312 |
314 |
}
|
313 |
315 |
|
314 |
|
private void writePairNode(Writer writer, PesiMergeObject merge1, PesiMergeObject merge2, String methodName) {
|
315 |
|
try {
|
316 |
|
Method method = PesiMergeObject.class.getDeclaredMethod("get"+methodName);
|
317 |
|
TaxonNodeDto value = (TaxonNodeDto) method.invoke(merge1);
|
318 |
|
writer.append(value==null?"":value.getTitleCache()).append(";");
|
319 |
|
value = (TaxonNodeDto) method.invoke(merge2);
|
320 |
|
writer.append(value==null?"":value.getTitleCache()).append(";");
|
321 |
|
} catch (Exception e) {
|
322 |
|
e.printStackTrace();
|
|
316 |
private enum Compare{
|
|
317 |
NO,
|
|
318 |
YES,
|
|
319 |
KEEP_FIRST;
|
|
320 |
|
|
321 |
boolean isAnyCompare(){
|
|
322 |
return this == NO;
|
323 |
323 |
}
|
324 |
324 |
}
|
325 |
325 |
|
326 |
|
private void writePair(Writer writer, PesiMergeObject merge1, PesiMergeObject merge2, String methodName) {
|
|
326 |
private void writePair(Writer writer, PesiMergeObject merge1, PesiMergeObject merge2, String methodName, Compare compare) {
|
327 |
327 |
try {
|
328 |
328 |
Method method = PesiMergeObject.class.getDeclaredMethod("get"+methodName);
|
329 |
329 |
String value1 = (String) method.invoke(merge1);
|
330 |
|
writer.append(normalize(value1)).append(";");
|
331 |
330 |
String value2 = (String) method.invoke(merge2);
|
|
331 |
if (compare.isAnyCompare() && CdmUtils.nullSafeEqual(value1, value2)){
|
|
332 |
value2 = StringUtils.isBlank(value2)? "":"-";
|
|
333 |
if (compare == Compare.YES){
|
|
334 |
value1 = value2;
|
|
335 |
}
|
|
336 |
}
|
|
337 |
writer.append(normalize(value1)).append(";");
|
332 |
338 |
writer.append(normalize(value2)).append(";");
|
333 |
339 |
} catch (Exception e) {
|
334 |
340 |
e.printStackTrace();
|
... | ... | |
339 |
345 |
return CdmUtils.Nz(val).replace(";", "@");
|
340 |
346 |
}
|
341 |
347 |
|
342 |
|
private void writeCsvLine(Writer writer, Map<UUID,PesiMergeObject> mergeObjects, Map<UUID,String> sources) throws IOException{
|
343 |
|
|
344 |
|
for (UUID uuid : sourceRefUuids){
|
345 |
|
PesiMergeObject merging = mergeObjects.get(uuid);
|
346 |
|
if(merging == null){
|
347 |
|
continue;
|
348 |
|
}
|
349 |
|
writer.append(Nz(sources.get(uuid))).append(";");
|
350 |
|
writer.append(Nz(merging.getUuidName())).append(";");
|
351 |
|
writer.append(Nz(merging.getIdInSource())).append(";");
|
352 |
|
writer.append(Nz(merging.getNameCache())).append(";");
|
353 |
|
writer.append(Nz(merging.getAuthor())).append(";");
|
354 |
|
writer.append(Nz(merging.getRank())).append(";");
|
355 |
|
if (merging.isStatus()){
|
356 |
|
writer.append("accepted").append(";");
|
357 |
|
}else{
|
358 |
|
writer.append("synonym").append(";");
|
359 |
|
}
|
360 |
|
writer.append(Nz(merging.getPhylum() != null? merging.getPhylum().getTitleCache(): "")).append(";");
|
361 |
|
writer.append(Nz(merging.getParentString())).append(";");
|
362 |
|
writer.append(Nz(merging.getParentRankString())).append(";");
|
363 |
|
}
|
364 |
|
writer.append('\n');
|
365 |
|
}
|
366 |
|
|
367 |
348 |
private List<Map<UUID,List<PesiMergeObject>>> createMergeObjects(
|
368 |
349 |
Map<String, Map<UUID, Set<TaxonName>>> identicalNames,
|
369 |
350 |
CdmApplicationController appCtr){
|
... | ... | |
416 |
397 |
//authorship
|
417 |
398 |
mergeObject.setAuthor(name.getAuthorshipCache());
|
418 |
399 |
|
|
400 |
//nom.ref.
|
|
401 |
mergeObject.setNomenclaturalReference(name.getNomenclaturalReference()== null?null: name.getNomenclaturalReference().getAbbrevTitleCache());
|
|
402 |
|
419 |
403 |
//rank
|
420 |
404 |
mergeObject.setRank(name.getRank().getLabel());
|
421 |
405 |
|
ref #1447 further improve findIdenticalNames for PESI