Project

General

Profile

« Previous | Next » 

Revision 87bd976a

Added by Andreas Müller about 4 years ago

ref #1447 further improve findIdenticalNames for PESI

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/PesiFindIdenticalNamesActivator.java
18 18
import java.util.Set;
19 19
import java.util.UUID;
20 20

  
21
import org.apache.commons.lang3.StringUtils;
21 22
import org.apache.log4j.Logger;
22 23
import org.springframework.transaction.TransactionStatus;
23 24

  
......
41 42
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
42 43
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto;
43 44

  
45
/**
46
 * Finds taxa with identical {@link TaxonName#getNameCache() name cache} but from different
47
 * sources (import source) and writes them into multiple csv file.
48
 * All cases are stored in file xxx_namesAll, some prefiltered files are created for e.g.
49
 * those having different parents or different authors.
50
 * Taxa are pairwise compared. If a name appears in 3 sources for each of the 3 pairs 1 record
51
 * is created below each other. Also if a name appears multiple times (e.g. homonyms) in 1
52
 * DB and 1 time in another. Each of the multiple names is compared to the other databases
53
 * record.
54
 * <BR><BR>
55
 *
56
 * TODO is is necessary to create these extra files? Filters can also be appied in Excel.
57
 *
58
 * @author a.mueller
59
 * @since 22.01.2020
60
 */
44 61
public class PesiFindIdenticalNamesActivator {
45 62

  
46 63
    private static final Logger logger = Logger.getLogger(PesiFindIdenticalNamesActivator.class);
......
219 236
        }
220 237
    }
221 238

  
222
    //old method when all sources were in 1 line
223
    private boolean isDifferent(Map<UUID, PesiMergeObject> merging, Method method)
224
            throws IllegalAccessException, IllegalArgumentException, InvocationTargetException {
225

  
226
        if (method == null){
227
            return true;
228
        }
229
        Object value = null;
230
        boolean isFirst = true;
231
        for (UUID sourceUuid: merging.keySet()){
232
            if (isFirst){
233
                value = method.invoke(merging.get(sourceUuid));
234
                isFirst = false;
235
            }else{
236
                Object newValue = method.invoke(merging.get(sourceUuid));
237
                if (!CdmUtils.nullSafeEqual(newValue, value)){
238
                    return true;
239
                }
240
            }
241
        }
242
        return false;
243
    }
244

  
245 239
	private void createHeader(Writer writer, String firstLine){
246 240
	 	try {
247 241
            writer.append(firstLine);
248 242
            writer.append('\n');
249
            writeHeaderPair(writer, "taxon uuid");
250
            writeHeaderPair(writer, "taxon id");
251
            writer.append("next name cache").append(";");
252
            writer.append("diff").append(";");
253
            writeHeaderPair(writer, "source");
254
            writeHeaderPair(writer, "name uuid");
255
            writeHeaderPair(writer, "idInSource");
256
            writeHeaderPair(writer, "nameCache");
243
            writeHeaderPair(writer, "tid");
244
            writer.append("use;");
245
            writer.append("nameUse;");
246
            writer.append("next;");
247
            writer.append("diff;");
248
            writeHeaderPair(writer, "src");
249
//            writeHeaderPair(writer, "nuuid");
250
//            writeHeaderPair(writer, "idInSource");
251
            writer.append("nameCache;");
257 252
            writeHeaderPair(writer, "author");
253
            writeHeaderPair(writer, "nom.ref.");
258 254
            writeHeaderPair(writer, "rank");
259 255
            writeHeaderPair(writer, "kingdom");
260 256
            writeHeaderPair(writer, "phylum");
......
262 258
            writeHeaderPair(writer, "parentString");
263 259
            writeHeaderPair(writer, "parentRankString");
264 260
            writeHeaderPair(writer, "status");
261
            writeHeaderPair(writer, "tuuid");
262

  
265 263
            writer.append('\n');
266 264
        } catch (IOException e) {
267 265
            e.printStackTrace();
......
277 275
           PesiMergeObject merge1, PesiMergeObject merge2,
278 276
           Method method, boolean isNextNameCache){
279 277

  
280
        writePair(writer, merge1, merge2, "UuidTaxon");
281
        writePair(writer, merge1, merge2, "IdTaxon");
278
        writePair(writer, merge1, merge2, "IdTaxon", Compare.NO);
279
        writeSingleValue(writer, "");
280
        writeSingleValue(writer, "");
282 281
        writeSingleValue(writer, isNextNameCache?"1":"0");
283 282
        boolean different = isDifferent(merge1,  merge2, method);
284 283
        writeSingleValue(writer, different?"1":"0");
285 284
        writeSingleValue(writer, sourcesLabels.get(UUID.fromString(merge1.getUuidSource())));
286 285
        writeSingleValue(writer, sourcesLabels.get(UUID.fromString(merge2.getUuidSource())));
287
        writePair(writer, merge1, merge2, "UuidName");
288
        writePair(writer, merge1, merge2, "IdInSource");
289
        writePair(writer, merge1, merge2, "NameCache");
290
        writePair(writer, merge1, merge2, "Author");
291
        writePair(writer, merge1, merge2, "Rank");
292
        writePairNode(writer, merge1, merge2, "Kingdom");
293
        writePairNode(writer, merge1, merge2, "Phylum");
294
        writePairNode(writer, merge1, merge2, "Family");
295
        writePair(writer, merge1, merge2, "ParentString");
296
        writePair(writer, merge1, merge2, "ParentRankString");
297
        writeSingleValue(writer, merge1.isStatus()?"accepted":"synonym");
298
        writeSingleValue(writer, merge2.isStatus()?"accepted":"synonym");
286
//        writePair(writer, merge1, merge2, "UuidName");
287
//        writePair(writer, merge1, merge2, "IdInSource");
288
        writeSingleValue(writer, merge1.getNameCache());
289
//        writePair(writer, merge1, merge2, "NameCache");
290
        writePair(writer, merge1, merge2, "Author", Compare.YES);
291
        writePair(writer, merge1, merge2, "NomenclaturalReference", Compare.YES);
292
        writePair(writer, merge1, merge2, "Rank", Compare.YES);
293
        writePair(writer, merge1, merge2, "KingdomCache", Compare.KEEP_FIRST);
294
        writePair(writer, merge1, merge2, "PhylumCache", Compare.YES);
295
        writePair(writer, merge1, merge2, "FamilyCache", Compare.YES);
296
        writePair(writer, merge1, merge2, "ParentString", Compare.YES);
297
        writePair(writer, merge1, merge2, "ParentRankString", Compare.YES);
298
        writePair(writer, merge1, merge2, "StatusStr", Compare.YES);
299
        writePair(writer, merge1, merge2, "UuidTaxon", Compare.YES);
300

  
299 301
        try {
300 302
            writer.append('\n');
301 303
        } catch (IOException e) {
......
311 313
        }
312 314
    }
313 315

  
314
    private void writePairNode(Writer writer, PesiMergeObject merge1, PesiMergeObject merge2, String methodName) {
315
        try {
316
            Method method = PesiMergeObject.class.getDeclaredMethod("get"+methodName);
317
            TaxonNodeDto value = (TaxonNodeDto) method.invoke(merge1);
318
            writer.append(value==null?"":value.getTitleCache()).append(";");
319
            value = (TaxonNodeDto) method.invoke(merge2);
320
            writer.append(value==null?"":value.getTitleCache()).append(";");
321
        } catch (Exception e) {
322
            e.printStackTrace();
316
    private enum Compare{
317
        NO,
318
        YES,
319
        KEEP_FIRST;
320

  
321
        boolean isAnyCompare(){
322
            return this == NO;
323 323
        }
324 324
    }
325 325

  
326
    private void writePair(Writer writer, PesiMergeObject merge1, PesiMergeObject merge2, String methodName) {
326
    private void writePair(Writer writer, PesiMergeObject merge1, PesiMergeObject merge2, String methodName, Compare compare) {
327 327
        try {
328 328
            Method method = PesiMergeObject.class.getDeclaredMethod("get"+methodName);
329 329
            String value1 = (String) method.invoke(merge1);
330
            writer.append(normalize(value1)).append(";");
331 330
            String value2 = (String) method.invoke(merge2);
331
            if (compare.isAnyCompare() && CdmUtils.nullSafeEqual(value1, value2)){
332
                value2 = StringUtils.isBlank(value2)? "":"-";
333
                if (compare == Compare.YES){
334
                    value1 = value2;
335
                }
336
            }
337
            writer.append(normalize(value1)).append(";");
332 338
            writer.append(normalize(value2)).append(";");
333 339
        } catch (Exception e) {
334 340
            e.printStackTrace();
......
339 345
        return CdmUtils.Nz(val).replace(";", "@");
340 346
    }
341 347

  
342
    private void writeCsvLine(Writer writer, Map<UUID,PesiMergeObject> mergeObjects, Map<UUID,String> sources) throws IOException{
343

  
344
        for (UUID uuid : sourceRefUuids){
345
	        PesiMergeObject merging = mergeObjects.get(uuid);
346
	        if(merging == null){
347
	            continue;
348
	        }
349
	        writer.append(Nz(sources.get(uuid))).append(";");
350
            writer.append(Nz(merging.getUuidName())).append(";");
351
	        writer.append(Nz(merging.getIdInSource())).append(";");
352
	        writer.append(Nz(merging.getNameCache())).append(";");
353
	        writer.append(Nz(merging.getAuthor())).append(";");
354
	        writer.append(Nz(merging.getRank())).append(";");
355
	        if (merging.isStatus()){
356
	            writer.append("accepted").append(";");
357
	        }else{
358
	            writer.append("synonym").append(";");
359
	        }
360
	        writer.append(Nz(merging.getPhylum() != null? merging.getPhylum().getTitleCache(): "")).append(";");
361
	        writer.append(Nz(merging.getParentString())).append(";");
362
	        writer.append(Nz(merging.getParentRankString())).append(";");
363
	    }
364
        writer.append('\n');
365
	}
366

  
367 348
    private List<Map<UUID,List<PesiMergeObject>>> createMergeObjects(
368 349
            Map<String, Map<UUID, Set<TaxonName>>> identicalNames,
369 350
	        CdmApplicationController appCtr){
......
416 397
                    //authorship
417 398
                    mergeObject.setAuthor(name.getAuthorshipCache());
418 399

  
400
                    //nom.ref.
401
                    mergeObject.setNomenclaturalReference(name.getNomenclaturalReference()== null?null: name.getNomenclaturalReference().getAbbrevTitleCache());
402

  
419 403
                    //rank
420 404
                    mergeObject.setRank(name.getRank().getLabel());
421 405

  
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/PesiMergeObject.java
18 18

  
19 19
	private String nameCache;
20 20

  
21
	private boolean status;
21
	private boolean isStatus;
22 22

  
23 23
	private String author;
24 24

  
25 25
	private String rank;
26 26

  
27
	private String nomenclaturalReference;
28

  
27 29
	private TaxonNodeDto phylum;
28 30

  
29 31
	private TaxonNodeDto kingdom;
......
74 76
		this.rank = rank;
75 77
	}
76 78

  
77
	public TaxonNodeDto getPhylum() {
78
		return phylum;
79
	}
80
	public void setPhylum(TaxonNodeDto phylum) {
81
		this.phylum = phylum;
82
	}
83

  
84 79
	public boolean isStatus() {
85
		return status;
80
		return isStatus;
86 81
	}
82
    public String getStatusStr() {
83
        return isStatus? "accepted":"synonym";
84
    }
87 85
	public void setStatus(boolean status) {
88
		this.status = status;
86
		this.isStatus = status;
89 87
	}
90 88

  
91 89
	public String getAuthor() {
......
112 110
    public TaxonNodeDto getKingdom() {
113 111
        return kingdom;
114 112
    }
113
    public String getKingdomCache() {
114
        return kingdom == null? null : kingdom.getNameCache();
115
    }
115 116
    public void setKingdom(TaxonNodeDto kingdom) {
116 117
        this.kingdom = kingdom;
117 118
    }
118 119

  
120
    public TaxonNodeDto getPhylum() {
121
        return phylum;
122
    }
123
    public String getPhylumCache() {
124
        return phylum == null? null : phylum.getNameCache();
125
    }
126
    public void setPhylum(TaxonNodeDto phylum) {
127
        this.phylum = phylum;
128
    }
129

  
119 130
    public TaxonNodeDto getFamily() {
120 131
        return family;
121 132
    }
133
    public String getFamilyCache() {
134
        return family == null? null : family.getNameCache();
135
    }
122 136
    public void setFamily(TaxonNodeDto family) {
123 137
        this.family = family;
124 138
    }
......
146 160
    public void setUuidSource(String uuidSource) {
147 161
        this.uuidSource = uuidSource;
148 162
    }
163

  
164
    public String getNomenclaturalReference() {
165
        return nomenclaturalReference;
166
    }
167
    public void setNomenclaturalReference(String nomenclaturalReference) {
168
        this.nomenclaturalReference = nomenclaturalReference;
169
    }
149 170
}

Also available in: Unified diff