Project

General

Profile

Download (20.4 KB) Statistics
| Branch: | Revision:
1
package eu.etaxonomy.cdm.app.pesi.merging;
2

    
3
import java.io.File;
4
import java.io.FileOutputStream;
5
import java.io.IOException;
6
import java.io.OutputStreamWriter;
7
import java.io.Writer;
8
import java.lang.reflect.InvocationTargetException;
9
import java.lang.reflect.Method;
10
import java.nio.charset.StandardCharsets;
11
import java.util.ArrayList;
12
import java.util.Arrays;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.Iterator;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Set;
19
import java.util.UUID;
20

    
21
import org.apache.log4j.Logger;
22
import org.springframework.transaction.TransactionStatus;
23

    
24
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
25
import eu.etaxonomy.cdm.app.common.CdmDestinations;
26
import eu.etaxonomy.cdm.common.CdmUtils;
27
import eu.etaxonomy.cdm.common.StringComparator;
28
import eu.etaxonomy.cdm.database.DbSchemaValidation;
29
import eu.etaxonomy.cdm.database.ICdmDataSource;
30
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
31
import eu.etaxonomy.cdm.io.pesi.merging.PesiMergeObject;
32
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
33
import eu.etaxonomy.cdm.model.common.CdmBase;
34
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
35
import eu.etaxonomy.cdm.model.name.Rank;
36
import eu.etaxonomy.cdm.model.name.TaxonName;
37
import eu.etaxonomy.cdm.model.taxon.Synonym;
38
import eu.etaxonomy.cdm.model.taxon.Taxon;
39
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
41
import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
42
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
43
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto;
44

    
45
public class PesiFindIdenticalNamesActivator {
46

    
47
    private static final Logger logger = Logger.getLogger(PesiFindIdenticalNamesActivator.class);
48

    
49
    //static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2();
50
//	static final ICdmDataSource pesiSource = CdmDestinations.cdm_test_local_faunaEu_mysql();
51
	static final ICdmDataSource pesiSource = CdmDestinations.cdm_pesi2019_final();
52

    
53
	static final String path = System.getProperty("user.home")+File.separator+".cdmLibrary"+File.separator+"pesi"+File.separator+"pesimerge";
54

    
55
	private static UUID faunaEuSourceUuid = PesiTransformer.uuidSourceRefFaunaEuropaea;
56
	private static UUID ermsSourceUuid = PesiTransformer.uuidSourceRefErms;
57
	private static UUID ifSourceUuid = PesiTransformer.uuidSourceRefIndexFungorum;
58
	private static UUID emSourceUuid = PesiTransformer.uuidSourceRefEuroMed;
59
	private static List<UUID> sourceRefUuids = new ArrayList<>();
60
	private static Map<UUID,String> sources = new HashMap<>();
61

    
62
    static {
63
        sourceRefUuids.addAll(Arrays.asList(new UUID[]{emSourceUuid, ermsSourceUuid, faunaEuSourceUuid, ifSourceUuid}));
64
        sources.put(emSourceUuid, "E+M");
65
        sources.put(ermsSourceUuid, "ERMS");
66
        sources.put(faunaEuSourceUuid, "FauEu");
67
        sources.put(ifSourceUuid, "IF");
68
    }
69

    
70

    
71
	private void invoke(ICdmDataSource source){
72

    
73
        CdmApplicationController app = CdmIoApplicationController.NewInstance(source, DbSchemaValidation.VALIDATE, false);
74

    
75
        List<String> propertyPaths = new ArrayList<>();
76
        propertyPaths.add("sources.*");
77
        propertyPaths.add("sources.idInSource");
78
        propertyPaths.add("sources.idNamespace");
79
        propertyPaths.add("taxonBases.*");
80
        propertyPaths.add("taxonBases.relationsFromThisTaxon");
81
        propertyPaths.add("taxonBases.taxonNodes.*");
82
        propertyPaths.add("taxonBases.taxonNodes.parent.*");
83
        propertyPaths.add("taxonBases.taxonNodes.childNodes.*");
84
        propertyPaths.add("taxonBases.taxonNodes.childNodes.classification.rootNode.childNodes.*");
85
        propertyPaths.add("taxonBases.taxonNodes.parent.taxon.name.*");
86
        propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.*");
87
        propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.*");
88
        propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.classification.rootNode.childNodes.*");
89
        System.out.println("Start getIdenticalNames...");
90

    
91
        Map<String, Map<UUID, Set<TaxonName>>> namesOfIdenticalTaxa;
92
        TransactionStatus tx = app.startTransaction(true);
93
        try {
94
            namesOfIdenticalTaxa = app.getTaxonService().findIdenticalTaxonNames(sourceRefUuids, propertyPaths);
95
        } catch (Exception e) {
96
            e.printStackTrace();
97
            return;
98
        }
99
        System.out.println("Start creating merging objects");
100
        List<Map<UUID, PesiMergeObject>> mergingObjects = createMergeObjects(namesOfIdenticalTaxa, app);
101
        app.commitTransaction(tx);
102

    
103
        boolean resultOK = true;
104
        System.out.println("Start creating csv files");
105
        resultOK &= writeSameNamesDifferentAuthorToCsv(mergingObjects, sources, path + "_authors.csv");
106
        resultOK &= writeSameNamesDifferentStatusToCsv(mergingObjects, sources, path + "_status.csv");
107
        resultOK &= writeSameNamesToCsvFile(mergingObjects, sources, path + "_names.csv");
108
        resultOK &= writeSameNamesDifferentPhylumToCsv(mergingObjects, sources, path + "_phylum.csv");
109
        resultOK &= writeSameNamesDifferentParentToCsv(mergingObjects, sources, path + "_parent.csv");
110
        resultOK &= writeSameNamesDifferentRankToCsv(mergingObjects, sources, path + "_rank.csv");
111

    
112
        System.out.println("End find identical names for PESI: " + resultOK + ". Results written to " + path);
113
	}
114

    
115
	private boolean writeSameNamesToCsvFile(
116
			List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName) {
117

    
118
	    String header = "same names (all)";
119
        String methodName = null;
120
        return writeDifference(header, methodName, mergingObjects, sources, sFileName);
121
	}
122

    
123
	private boolean writeSameNamesDifferentPhylumToCsv(
124
	        List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){
125

    
126
	    String header = "same names but different phylum";
127
	    String methodName = "getPhylum";
128
	    return writeDifference(header, methodName, mergingObjects, sources, sFileName);
129
	}
130

    
131
    private boolean writeSameNamesDifferentParentToCsv(
132
	        List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){
133

    
134
		    String header = "same names but different parent";
135
	        String methodName = "getParentString";
136
	        return writeDifference(header, methodName, mergingObjects, sources, sFileName);
137
	}
138

    
139
	private boolean writeSameNamesDifferentRankToCsv(
140
	        List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){
141

    
142
        String header = "same names but different rank";
143
        String methodName = "getRank";
144
        return writeDifference(header, methodName, mergingObjects, sources, sFileName);
145
	}
146

    
147
    private boolean writeSameNamesDifferentStatusToCsv(
148
            List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){
149

    
150
        String header = "same names but different status";
151
        String methodName = "isStatus";
152
        return writeDifference(header, methodName, mergingObjects, sources, sFileName);
153
    }
154

    
155
    private boolean writeSameNamesDifferentAuthorToCsv(
156
            List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){
157

    
158
        String header = "same names but different author";
159
        String methodName = "getAuthor";
160
        return writeDifference(header, methodName, mergingObjects, sources, sFileName);
161
    }
162

    
163
    private boolean writeDifference(String header, String methodName,
164
            List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName) {
165

    
166
        try{
167
            Method method = methodName == null? null : PesiMergeObject.class.getMethod(methodName);
168

    
169
//            FileWriter writer = new FileWriter(sFileName);
170
            Writer writer = new OutputStreamWriter(new FileOutputStream(new File(sFileName)), StandardCharsets.UTF_8);
171

    
172
            //create Header
173
            createHeader(writer, header);
174

    
175
            //write data
176
            for (Map<UUID, PesiMergeObject> merging : mergingObjects){
177
                if (isDifferent(merging, method)){
178
                    writeCsvLine(writer, merging, sources) ;
179
                }
180
            }
181
            writer.flush();
182
            writer.close();
183
            return true;
184
        }catch(IOException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e){
185
            logger.error(e.getMessage());
186
            return false;
187
        }
188
    }
189

    
190
    private boolean isDifferent(Map<UUID, PesiMergeObject> merging, Method method)
191
            throws IllegalAccessException, IllegalArgumentException, InvocationTargetException {
192

    
193
        if (method == null){
194
            return true;
195
        }
196
        Object value = null;
197
        boolean isFirst = true;
198
        for (UUID sourceUuid: merging.keySet()){
199
            if (isFirst){
200
                value = method.invoke(merging.get(sourceUuid));
201
                isFirst = false;
202
            }else{
203
                Object newValue = method.invoke(merging.get(sourceUuid));
204
                if (!CdmUtils.nullSafeEqual(newValue, value)){
205
                    return true;
206
                }
207
            }
208
        }
209
        return false;
210
    }
211

    
212
	private void createHeader(Writer writer, String firstLine) throws IOException{
213
		 	writer.append(firstLine);
214
		    writer.append('\n');
215

    
216
		    for (int i=1; i<=2; i++){
217
		        writer.append("source"+i);
218
                writer.append(';');
219
                writer.append("name uuid"+i);
220
		        writer.append(';');
221
		        writer.append("name id"+i);
222
		        writer.append(';');
223
		        writer.append("name"+i);
224
		        writer.append(';');
225
		        writer.append("author"+i);
226
		        writer.append(';');
227
		        writer.append("rank"+i);
228
		        writer.append(';');
229
		        writer.append("status"+i);
230
		        writer.append(';');
231
		        writer.append("phylum"+i);
232
		        writer.append(';');
233
		        writer.append("parent"+i);
234
		        writer.append(';');
235
		        writer.append("parent rank"+i);
236
		        writer.append(';');
237
		    }
238
			writer.append('\n');
239
	}
240

    
241
	private void writeCsvLine(Writer writer, Map<UUID,PesiMergeObject> mergeObjects, Map<UUID,String> sources) throws IOException{
242

    
243
        for (UUID uuid : sourceRefUuids){
244
	        PesiMergeObject merging = mergeObjects.get(uuid);
245
	        if(merging == null){
246
	            continue;
247
	        }
248
	        writer.append(Nz(sources.get(uuid))).append(";");
249
            writer.append(Nz(merging.getUuidName())).append(";");
250
	        writer.append(Nz(merging.getIdInSource())).append(";");
251
	        writer.append(Nz(merging.getNameCache())).append(";");
252
	        writer.append(Nz(merging.getAuthor())).append(";");
253
	        writer.append(Nz(merging.getRank())).append(";");
254
	        if (merging.isStatus()){
255
	            writer.append("accepted").append(";");
256
	        }else{
257
	            writer.append("synonym").append(";");
258
	        }
259
	        writer.append(Nz(merging.getPhylum() != null? merging.getPhylum().getTitleCache(): "")).append(";");
260
	        writer.append(Nz(merging.getParentString())).append(";");
261
	        writer.append(Nz(merging.getParentRankString())).append(";");
262
	    }
263
        writer.append('\n');
264
	}
265

    
266
    private List<Map<UUID,PesiMergeObject>> createMergeObjects(Map<String, Map<UUID, Set<TaxonName>>> names,
267
	        CdmApplicationController appCtr){
268

    
269
		List<Map<UUID,PesiMergeObject>> merge = new ArrayList<>();
270

    
271
		List<String> nameCaches = new ArrayList<>(names.keySet());
272
		nameCaches.sort(StringComparator.Instance);
273
		for (String nameCache: nameCaches){
274
		    createSingleMergeObject(appCtr, merge, names.get(nameCache));
275
		}
276

    
277
		return merge;
278
	}
279

    
280

    
281
    private void createSingleMergeObject(CdmApplicationController appCtr, List<Map<UUID,PesiMergeObject>> merge,
282
            Map<UUID, Set<TaxonName>> identicalNames) {
283

    
284
        Map<UUID,PesiMergeObject> mergeMap = new HashMap<>();
285

    
286
        for (UUID sourceUuid : identicalNames.keySet()){
287
            Set<TaxonName> names = identicalNames.get(sourceUuid);
288
            if (names.isEmpty()){
289
                continue;
290
            }
291
            TaxonName name = names.iterator().next();
292
            String nameAndIdStr = name.getTitleCache() +  "; id = " + name.getId();
293
            if (names.size()>1){
294
                logger.warn("Multiple names per source not yet handled. Take arbitrary one: " + nameAndIdStr);
295
            }
296

    
297
            PesiMergeObject mergeObject = new PesiMergeObject();
298
            mergeMap.put(sourceUuid, mergeObject);
299

    
300
            Set<TaxonBase> taxonBases = name.getTaxonBases();
301
            if (taxonBases.isEmpty()){
302
                logger.warn("No taxonbase attached to name. This is not yet handled: " + nameAndIdStr);
303
                continue;
304
            }
305
            if (taxonBases.size() > 1) {
306
                //TODO: find the two correct names
307
                logger.warn("Name has not exact 1 but " + taxonBases.size() + " taxon base attached. This is not yet handled. Take arbitrary one.");
308
            }
309

    
310
            //uuid
311
            mergeObject.setUuidName(name.getUuid().toString());
312

    
313
            //nameCache
314
            mergeObject.setNameCache(name.getNameCache());
315

    
316
            //authorship
317
            mergeObject.setAuthor(name.getAuthorshipCache());
318

    
319
            //rank
320
            mergeObject.setRank(name.getRank().getLabel());
321

    
322
            //Phylum
323
            TaxonNodeDto phylum = getPhylum(appCtr, name);
324
            mergeObject.setPhylum(phylum);
325

    
326
            //idInSource
327
            Iterator<IdentifiableSource> sources = name.getSources().iterator();
328
            //TODO idInSource - what if multiple sources exist?
329
            if (sources.hasNext()){
330
                IdentifiableSource source = sources.next();
331
                String idInSource = source.getIdInSource();
332
                mergeObject.setIdInSource(idInSource);
333
            }
334

    
335
            //status and parent
336
            Set<Taxon> taxa = name.getTaxa();
337
            taxa = getReallyAcceptedTaxa(taxa);
338
            if (!taxa.isEmpty()){
339
                mergeObject.setStatus(true);
340
                Iterator<Taxon> taxaIterator = taxa.iterator();
341
                Taxon taxon = null;
342
                while (taxaIterator.hasNext()){
343
                    taxon = taxaIterator.next();
344
                    if (!taxon.isMisapplication()){
345
                        break;
346
                    }
347
                }
348
                @SuppressWarnings("null")
349
                Set<TaxonNode> nodes = taxon.getTaxonNodes();
350
                Iterator<TaxonNode> taxonNodeIterator = nodes.iterator();
351
                TaxonNode parentNode = null;
352
                while (taxonNodeIterator.hasNext()){
353
                    TaxonNode node = taxonNodeIterator.next();
354
                    if (!node.isTopmostNode()){
355
                        parentNode = node.getParent();
356
                    }
357
                }
358
                //TODO: ändern mit erweitertem Initializer..
359
                if (parentNode != null){
360
                    TaxonName parentName = CdmBase.deproxy(parentNode.getTaxon().getName());
361
                    String parentNameCache = parentName.getNameCache();
362
                    mergeObject.setParentString(parentNameCache);
363
                    mergeObject.setParentRankString(parentName.getRank().getLabel());
364
                }
365
            }else{
366
                mergeObject.setStatus(false);
367
                TaxonNode parentNode = getAcceptedNode(name);
368
                //TODO: ändern mit erweitertem Initializer..
369
                if (parentNode != null){
370
                    TaxonName parentName = CdmBase.deproxy(parentNode.getTaxon().getName());
371
                    String parentNameCache = parentName.getNameCache();
372
                    mergeObject.setParentString(parentNameCache);
373
                    mergeObject.setParentRankString(parentName.getRank().getLabel());
374
                }
375
            }
376
        }
377

    
378

    
379
        //set parent informations
380

    
381
        /*
382
        Set<HybridRelationship> parentRelations = zooName.getParentRelationships();
383
        Iterator parentIterator = parentRelations.iterator();
384
        HybridRelationship parentRel;
385
        ZoologicalName parentName;
386
        while (parentIterator.hasNext()){
387
            parentRel = (HybridRelationship)parentIterator.next();
388
            parentName = (ZoologicalName)parentRel.getParentName();
389
            mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
390
            mergeObject.setParentStringInErms(parentName.getNameCache());
391
        }
392

    
393
        parentRelations = zooName2.getParentRelationships();
394
        parentIterator = parentRelations.iterator();
395

    
396
        while (parentIterator.hasNext()){
397
            parentRel = (HybridRelationship)parentIterator.next();
398
            parentName = (ZoologicalName)parentRel.getParentName();
399
            mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
400
            mergeObject.setParentStringInFaunaEu(parentName.getNameCache());
401
        }*/
402

    
403
        merge.add(mergeMap);
404
    }
405

    
406
    private TaxonNodeDto getPhylum(CdmApplicationController appCtr, TaxonName name) {
407
        TaxonNodeDto phylum = null;
408
        if (name.getRank().equals(Rank.PHYLUM())) {
409
            Taxon taxon = getAcceptedTaxon(name);
410
            if (taxon != null) {
411
                if (taxon.getTaxonNodes().size()>1){
412
                    logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one.");
413
                }
414
                TaxonNode node = taxon.getTaxonNodes().iterator().next();
415
                phylum = new TaxonNodeDto(node);
416
            }
417

    
418
        }
419
        if (phylum == null && !name.getRank().isHigher(Rank.PHYLUM())){
420
            Taxon taxon = getAcceptedTaxon(name);
421
            if (!taxon.getTaxonNodes().isEmpty()){
422
                if (taxon.getTaxonNodes().size()>1){
423
                    logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one.");
424
                }
425
                TaxonNode node = taxon.getTaxonNodes().iterator().next();
426
                phylum = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(node.getClassification(), Rank.PHYLUM(), name);
427
            }
428
        }
429
        return phylum;
430
    }
431

    
432
	private TaxonNode getAcceptedNode(TaxonName ermsName) {
433
	    TaxonNode parentNode = null;
434
		Set<TaxonBase> taxonBases = ermsName.getTaxonBases();
435
		if (!taxonBases.isEmpty()) {
436
		    Taxon taxon = null;
437
			TaxonBase<?> taxonBase = taxonBases.iterator().next();
438
			if (taxonBase instanceof Synonym) {
439
				taxon = ((Synonym)taxonBase).getAcceptedTaxon();
440
			}else{
441
			    taxon = getAccTaxonForTaxonSynonym((Taxon)taxonBase);
442
			}
443
			Set<TaxonNode> nodes = taxon.getTaxonNodes();
444
			if (!nodes.isEmpty()) {
445
			    parentNode = nodes.iterator().next();
446
			}
447
		}
448

    
449
		return parentNode;
450
	}
451

    
452
	private Taxon getAcceptedTaxon(TaxonName name) {
453
		Taxon taxon = null;
454
		//prefer accepted taxon
455
		if (name.getTaxa() != null && !name.getTaxa().isEmpty()){
456
			taxon = name.getTaxa().iterator().next();
457
			taxon = getAccTaxonForTaxonSynonym(taxon);
458
		//else take synonym
459
		}else if (name.getTaxonBases() != null && !name.getTaxonBases().isEmpty()){
460
			TaxonBase<?> taxonBase = name.getTaxonBases().iterator().next();
461
			if (taxonBase instanceof Synonym) {
462
				Synonym syn = (Synonym)taxonBase;
463
				taxon = syn.getAcceptedTaxon();
464
			}
465
		}
466
		return taxon;
467
	}
468

    
469
    private Taxon getAccTaxonForTaxonSynonym(Taxon taxon) {
470
        if (!taxon.getRelationsFromThisTaxon().isEmpty()){
471
            for (TaxonRelationship rel: taxon.getRelationsFromThisTaxon()){
472
                UUID uuidType = rel.getType().getUuid();
473
                if (uuidType.equals(TaxonRelationshipType.uuidSynonymOfTaxonRelationship)
474
                        || uuidType.equals(TaxonRelationshipType.uuidHeterotypicSynonymTaxonRelationship)
475
                        || uuidType.equals(TaxonRelationshipType.uuidHomotypicSynonymTaxonRelationship)){
476
                    taxon = rel.getToTaxon();
477
                }
478
            }
479
        }
480
        return taxon;
481
    }
482

    
483
    /**
484
     * Filters out the ERMS taxon synonyms
485
     */
486
    private Set<Taxon> getReallyAcceptedTaxa(Set<Taxon> taxa) {
487
        Set<Taxon> result = new HashSet<>();
488
        for (Taxon taxon : taxa){
489
            Taxon accTaxon = getAccTaxonForTaxonSynonym(taxon);
490
            if(taxon.equals(accTaxon)) {
491
                result.add(taxon);
492
            }
493
        }
494
        return result;
495
    }
496

    
497
    private CharSequence Nz(String str) {
498
        return CdmUtils.Nz(str);
499
    }
500

    
501
    public static void main(String[] args) {
502
        PesiFindIdenticalNamesActivator activator = new PesiFindIdenticalNamesActivator();
503
        activator.invoke(pesiSource);
504
        System.exit(0);
505
    }
506
}
(2-2/2)