Project

General

Profile

Download (17.3 KB) Statistics
| Branch: | Revision:
1
package eu.etaxonomy.cdm.app.pesi.merging;
2

    
3
import java.io.FileWriter;
4
import java.io.IOException;
5
import java.util.ArrayList;
6
import java.util.HashSet;
7
import java.util.Iterator;
8
import java.util.List;
9
import java.util.Map;
10
import java.util.Set;
11
import java.util.UUID;
12

    
13
import com.sun.media.jfxmedia.logging.Logger;
14

    
15
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
16
import eu.etaxonomy.cdm.app.common.CdmDestinations;
17
import eu.etaxonomy.cdm.app.util.TestDatabase;
18
import eu.etaxonomy.cdm.database.DbSchemaValidation;
19
import eu.etaxonomy.cdm.database.ICdmDataSource;
20
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
21
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
22
import eu.etaxonomy.cdm.io.pesi.merging.FaunaEuErmsMerging;
23
import eu.etaxonomy.cdm.model.common.CdmBase;
24
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
25
import eu.etaxonomy.cdm.model.name.IZoologicalName;
26
import eu.etaxonomy.cdm.model.name.Rank;
27
import eu.etaxonomy.cdm.model.name.TaxonName;
28
import eu.etaxonomy.cdm.model.reference.Reference;
29
import eu.etaxonomy.cdm.model.taxon.Classification;
30
import eu.etaxonomy.cdm.model.taxon.Taxon;
31
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
32
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
33
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto;
34

    
35
public class FaunaEuErmsFindIdenticalNamesActivator {
36

    
37
	//static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2();
38
	static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.cdm_test_local_faunaEu_mysql();
39
	static Reference faunaSec;
40
	static Reference ermsSec;
41

    
42
	//TODO hole aus beiden DB alle TaxonNameBases
43

    
44
	private CdmApplicationController initDb(ICdmDataSource db) {
45

    
46
		// Init source DB
47
		//CdmApplicationController appCtrInit = null;
48
		CdmApplicationController appCtrInit = CdmIoApplicationController.NewInstance(db, DbSchemaValidation.VALIDATE, false);
49

    
50
		
51
		//appCtrInit = TestDatabase.initDb(db, DbSchemaValidation.VALIDATE, false);
52

    
53
		return appCtrInit;
54
	}
55

    
56

    
57
	/**
58
	 * @param args
59
	 */
60
	public static void main(String[] args) {
61

    
62
		FaunaEuErmsFindIdenticalNamesActivator sc = new FaunaEuErmsFindIdenticalNamesActivator();
63

    
64
		CdmApplicationController appCtrFaunaEu = sc.initDb(faunaEuropaeaSource);
65
		String sFileName = "C:\\Users\\k.luther\\test";
66
		//CdmApplicationController appCtrErms = sc.initDb(ermsSource);
67
		List<String> propertyPaths = new ArrayList<>();
68
		propertyPaths.add("sources.*");
69
		propertyPaths.add("sources.idInSource");
70
		propertyPaths.add("sources.idNamespace");
71
		propertyPaths.add("taxonBases.*");
72
		propertyPaths.add("taxonBases.relationsFromThisTaxon");
73
		propertyPaths.add("taxonBases.taxonNodes.*");
74
		propertyPaths.add("taxonBases.taxonNodes.parent.*");
75
		propertyPaths.add("taxonBases.taxonNodes.childNodes.*");
76
		propertyPaths.add("taxonBases.taxonNodes.childNodes.classification.rootNode.childNodes.*");
77
		propertyPaths.add("taxonBases.taxonNodes.parent.taxon.name.*");
78
		propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.*");
79
		propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.*");
80
		propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.classification.rootNode.childNodes.*");
81
		System.err.println("Start getIdenticalNames...");
82
		
83
		faunaSec = appCtrFaunaEu.getReferenceService().load(UUID.fromString("6786d863-75d4-4796-b916-c1c3dff4cb70"));
84
		ermsSec = appCtrFaunaEu.getReferenceService().load(UUID.fromString("7744bc26-f914-42c4-b54a-dd2a030a8bb7"));
85
		Map<String, List<TaxonName>> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(ermsSec, faunaSec, propertyPaths);
86
		//List<UUID> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(propertyPaths);
87

    
88
		System.err.println("first name: " + namesOfIdenticalTaxa.get(0) + " " + namesOfIdenticalTaxa.size());
89
		//TaxonName zooName = namesOfIdenticalTaxa.get(0);
90
		//System.err.println(zooName + " nr of taxa " + namesOfIdenticalTaxa.size());
91
		//TaxonNameComparator taxComp = new TaxonNameComparator();
92

    
93
		//Collections.sort(namesOfIdenticalTaxa,taxComp);
94
		System.err.println(namesOfIdenticalTaxa.get(0) + " - " + namesOfIdenticalTaxa.get(1) + " - " + namesOfIdenticalTaxa.get(2));
95
		List<FaunaEuErmsMerging> mergingObjects = new ArrayList<>();
96
		FaunaEuErmsMerging mergeObject;
97
		TaxonName faunaEuTaxName;
98
		TaxonName ermsTaxName;
99

    
100
		mergingObjects= sc.createMergeObjects(namesOfIdenticalTaxa, appCtrFaunaEu);
101

    
102
		sc.writeSameNamesdifferentAuthorToCsv(mergingObjects, sFileName + "_authors.csv");
103
		sc.writeSameNamesdifferentStatusToCsv(mergingObjects, sFileName + "_status.csv");
104
		sc.writeSameNamesToCsVFile(mergingObjects, sFileName + "_names.csv");
105
		sc.writeSameNamesdifferentPhylumToCsv(mergingObjects, sFileName + "_phylum.csv");
106

    
107

    
108
		System.out.println("End merging Fauna Europaea and Erms");
109

    
110
	}
111

    
112
	private boolean writeSameNamesToCsVFile(
113
			List<FaunaEuErmsMerging> mergingObjects, String string) {
114
	    try{
115
    		FileWriter writer = new FileWriter(string);
116

    
117
    	    //create Header
118
    	    String firstLine = "same names";
119
    	    createHeader(writer, firstLine);
120
    		for (FaunaEuErmsMerging merging : mergingObjects){
121
    	    	writeCsvLine(writer, merging) ;
122
    		}
123
    		writer.flush();
124
    		writer.close();
125
    	}catch(IOException e){
126
    	    return false;
127
    	}
128
    	return true;
129
	}
130

    
131
	private boolean writeSameNamesdifferentPhylumToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
132
		try
133
		{
134
		    FileWriter writer = new FileWriter(sfileName);
135

    
136
		    //create Header
137
		   String firstLine = "same names but different phylum";
138
		   createHeader(writer, firstLine);
139

    
140
			//write data
141
			for (FaunaEuErmsMerging merging : mergingObjects){
142
		    	//TODO
143
				if ((merging.getPhylumInErms()== null )^ (merging.getPhylumInFaunaEu()== null)){
144
					writeCsvLine(writer, merging) ;
145
				}else if(!((merging.getPhylumInErms()==null) && (merging.getPhylumInFaunaEu()==null))){
146
					if(!merging.getPhylumInErms().equals(merging.getPhylumInFaunaEu())){
147
						writeCsvLine(writer, merging) ;
148
					}
149
				}
150
			}
151
			writer.flush();
152
			writer.close();
153
		}
154
		catch(IOException e)
155
		{
156
		 return false;
157
		}
158
		return true;
159
	}
160

    
161
	private boolean writeSameNamesdifferentRankToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
162
		try
163
		{
164
		    FileWriter writer = new FileWriter(sfileName);
165
		    String firstLine = "same names but different rank";
166
		    //create Header
167
		    createHeader(writer, firstLine);
168

    
169
			//write data
170
			for (FaunaEuErmsMerging merging : mergingObjects){
171

    
172
				if (!merging.getRankInErms().equals(merging.getRankInFaunaEu())){
173
					writeCsvLine(writer, merging);
174
				}
175
			}
176
			writer.flush();
177
			writer.close();
178
		}
179
		catch(IOException e)
180
		{
181
		 return false;
182
		}
183
		return true;
184
	}
185

    
186
	private void createHeader(FileWriter writer, String firstLine) throws IOException{
187
		 	writer.append(firstLine);
188
		    writer.append('\n');
189
		    writer.append("uuid in Fauna Europaea");
190
			writer.append(';');
191
			writer.append("id in Fauna Europaea");
192
			writer.append(';');
193
			writer.append("name");
194
			writer.append(';');
195
			writer.append("author");
196
			writer.append(';');
197
			writer.append("rank");
198
			writer.append(';');
199
			writer.append("state");
200
			writer.append(';');
201
			writer.append("phylum");
202
			writer.append(';');
203
			writer.append("parent");
204
			writer.append(';');
205
			writer.append("parent rank");
206
			writer.append(';');
207

    
208
			writer.append("uuid in Erms");
209
			writer.append(';');
210
			writer.append("id in Erms");
211
			writer.append(';');
212
			writer.append("name");
213
			writer.append(';');
214
			writer.append("author");
215
			writer.append(';');
216
			writer.append("rank");
217
			writer.append(';');
218
			writer.append("state");
219
			writer.append(';');
220
			writer.append("phylum");
221
			writer.append(';');
222
			writer.append("parent");
223
			writer.append(';');
224
			writer.append("parent rank");
225
			writer.append('\n');
226
	}
227

    
228
	private boolean writeSameNamesdifferentStatusToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
229
		try
230
		{
231
		    FileWriter writer = new FileWriter(sfileName);
232

    
233
		    //create Header
234
		    String firstLine = "same names but different status";
235
		    createHeader(writer, firstLine);
236

    
237
			//write data
238
			for (FaunaEuErmsMerging merging : mergingObjects){
239

    
240
				if (merging.isStatInErms()^merging.isStatInFaunaEu()){
241
					 writeCsvLine(writer, merging);
242
				}
243
			}
244

    
245
			writer.flush();
246
			writer.close();
247
		}
248
		catch(IOException e)
249
		{
250
		 return false;
251
		}
252
		return true;
253
	}
254

    
255
	private boolean writeSameNamesdifferentAuthorToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
256
		try
257
		{
258
		    FileWriter writer = new FileWriter(sfileName);
259

    
260
		    //create Header
261
		   String firstLine = "same names but different authors";
262
		   createHeader(writer, firstLine);
263

    
264
			//write data
265
			for (FaunaEuErmsMerging merging : mergingObjects){
266

    
267
				if (!merging.getAuthorInErms().equals(merging.getAuthorInFaunaEu())){
268
					 writeCsvLine(writer, merging);
269
				}
270
			}
271

    
272

    
273
			writer.flush();
274
			writer.close();
275
		}
276
		catch(IOException e)
277
		{
278
		 return false;
279
		}
280
		return true;
281
	}
282

    
283
	private void writeCsvLine(FileWriter writer, FaunaEuErmsMerging merging) throws IOException{
284

    
285
		writer.append(merging.getUuidFaunaEu());
286
		writer.append(';');
287
		writer.append(merging.getIdInFaunaEu());
288
		writer.append(';');
289
		writer.append(merging.getNameCacheInFaunaEu());
290
		writer.append(';');
291
		writer.append(merging.getAuthorInFaunaEu());
292
		writer.append(';');
293
		writer.append(merging.getRankInFaunaEu());
294
		writer.append(';');
295
		if (merging.isStatInFaunaEu()){
296
			writer.append("accepted");
297
		}else{
298
			writer.append("synonym");
299
		}
300
		writer.append(';');
301
		writer.append(merging.getPhylumInFaunaEu().getTaxonTitleCache());
302
		writer.append(';');
303
		writer.append(merging.getParentStringInFaunaEu());
304
		writer.append(';');
305
		writer.append(merging.getParentRankStringInFaunaEu());
306
		writer.append(';');
307

    
308
		writer.append(merging.getUuidErms());
309
		writer.append(';');
310
		writer.append(merging.getIdInErms());
311
		writer.append(';');
312
		writer.append(merging.getNameCacheInErms());
313
		writer.append(';');
314
		writer.append(merging.getAuthorInErms());
315
		writer.append(';');
316
		writer.append(merging.getRankInErms());
317
		writer.append(';');
318
		if (merging.isStatInErms()){
319
			writer.append("accepted");
320
		}else{
321
			writer.append("synonym");
322
		}
323

    
324
		writer.append(';');
325
		writer.append(merging.getPhylumInErms().getTaxonTitleCache());
326
		writer.append(';');
327
		writer.append(merging.getParentStringInErms());
328
		writer.append(';');
329
		writer.append(merging.getParentRankStringInErms());
330
		writer.append('\n');
331
	}
332

    
333

    
334
	private List<FaunaEuErmsMerging> createMergeObjects(Map<String,List<TaxonName>> names, CdmApplicationController appCtr){
335

    
336
		List<FaunaEuErmsMerging> merge = new ArrayList<>();
337
		TaxonName zooName, zooName2;
338
		FaunaEuErmsMerging mergeObject;
339
		String idInSource1;
340
		List<TaxonName> identicalNames;
341
		for (String nameCache: names.keySet()){
342
			identicalNames = names.get(nameCache);
343
			
344
			mergeObject = new FaunaEuErmsMerging();
345
			//TODO:überprüfen, ob die beiden Namen identisch sind und aus unterschiedlichen DB kommen
346
			Classification faunaEuClassification = appCtr.getClassificationService().load(UUID.fromString("44d8605e-a7ce-41e1-bee9-99edfec01e7c"));
347
			Classification ermsClassification = appCtr.getClassificationService().load(UUID.fromString("6fa988a9-10b7-48b0-a370-2586fbc066eb"));
348
			//getPhylum
349
			TaxonNodeDto phylum1 = null;
350
			TaxonName faunaEuName = null;
351
			TaxonName ermsName = null;
352
			TaxonBase tempName = null;
353
			if (identicalNames.size() == 2) {
354
				Set<TaxonBase> taxonBases = identicalNames.get(0).getTaxonBases();
355
				if (taxonBases.size()==1) {
356
					Iterator<TaxonBase> it = taxonBases.iterator();
357
					tempName = it.next();
358
					if (tempName.getSec().equals(faunaSec)) {
359
						faunaEuName = identicalNames.get(0);
360
						ermsName = identicalNames.get(1);
361
					}else {
362
						faunaEuName = identicalNames.get(1);
363
						ermsName = identicalNames.get(0);
364
					}
365
				}else {
366
					//TODO: find the two correct names
367
				}
368
			}else {
369
				System.err.println(nameCache + " has more than two identical namecaches");
370
				return null;
371
			}
372
			phylum1 = null;
373
			if (faunaEuName != null && !faunaEuName.getRank().isHigher(Rank.PHYLUM())){
374
					phylum1 =appCtr.getTaxonNodeService().taxonNodeDtoParentRank(faunaEuClassification, Rank.PHYLUM(), faunaEuName);
375
			}
376

    
377
			TaxonNodeDto phylum2 = null;
378
			if (ermsName != null && !ermsName.getRank().isHigher(Rank.PHYLUM())){
379
				phylum2 = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(ermsClassification, Rank.PHYLUM(), ermsName);
380
			}
381
			mergeObject.setPhylumInErms(phylum1);
382
			mergeObject.setPhylumInFaunaEu(phylum2);
383

    
384
			//getUuids
385
			mergeObject.setUuidErms(ermsName.getUuid().toString());
386
			mergeObject.setUuidFaunaEu(faunaEuName.getUuid().toString());
387

    
388
			Iterator<IdentifiableSource> sources = ermsName.getSources().iterator();
389
			if (sources.hasNext()){
390
				IdentifiableSource source = sources.next();
391
				idInSource1 = source.getIdInSource();
392
				mergeObject.setIdInErms(idInSource1);
393
			}
394
			sources = faunaEuName.getSources().iterator();
395
			if (sources.hasNext()){
396
				IdentifiableSource source = sources.next();
397
				idInSource1 = source.getIdInSource();
398
				mergeObject.setIdInFaunaEu(idInSource1);
399
			}
400

    
401
			mergeObject.setNameCacheInErms(ermsName.getNameCache());
402
			mergeObject.setNameCacheInFaunaEu(faunaEuName.getNameCache());
403

    
404
			mergeObject.setAuthorInErms(ermsName.getAuthorshipCache());
405
			mergeObject.setAuthorInFaunaEu(faunaEuName.getAuthorshipCache());
406
			Set<Taxon> taxa = ermsName.getTaxa();
407
			if (!taxa.isEmpty()){
408
				mergeObject.setStatInErms(true);
409
				Iterator<Taxon> taxaIterator = taxa.iterator();
410
				Taxon taxon = null;
411
				while (taxaIterator.hasNext()){
412
					taxon = taxaIterator.next();
413
					if (!taxon.isMisapplication()){
414
						break;
415
					}
416
				}
417
				Set<TaxonNode> nodes = taxon.getTaxonNodes();
418
				Iterator<TaxonNode> taxonNodeIterator = nodes.iterator();
419
				TaxonNode node, parentNode = null;
420
				while (taxonNodeIterator.hasNext()){
421
					node = taxonNodeIterator.next();
422
					if (!node.isTopmostNode()){
423
						parentNode = node.getParent();
424
					}
425
				}
426
				//TODO: ändern mit erweitertem Initializer..
427
				if (parentNode != null){
428
				    TaxonName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName());
429
					String parentNameCache = parentName.getNameCache();
430
					mergeObject.setParentStringInErms(parentNameCache);
431
					mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
432
					//System.err.println("parentName: " + parentNameCache);
433
				}
434
			}else{
435
				mergeObject.setStatInErms(false);
436
			}
437
			taxa = faunaEuName.getTaxa();
438
			if (!taxa.isEmpty()){
439
				mergeObject.setStatInFaunaEu(true);
440
				Iterator<Taxon> taxaIterator = taxa.iterator();
441
				Taxon taxon = null;
442
				while (taxaIterator.hasNext()){
443
					taxon = taxaIterator.next();
444
					if (!taxon.isMisapplication()){
445
						break;
446
					}
447
				}
448
				Set<TaxonNode> nodes = taxon.getTaxonNodes();
449
				Iterator<TaxonNode> taxonNodeIterator = nodes.iterator();
450
				TaxonNode node, parentNode = null;
451
				while (taxonNodeIterator.hasNext()){
452
					node = taxonNodeIterator.next();
453
					if (!node.isTopmostNode()){
454
						parentNode = node.getParent();
455
					}
456
				}
457
				//TODO: ändern mit erweitertem Initializer..
458
				if (parentNode != null){
459
					if (parentNode.getTaxon().getName().isZoological()){
460

    
461
    					IZoologicalName parentName = CdmBase.deproxy(parentNode.getTaxon().getName());
462
    					String parentNameCache = parentName.getNameCache();
463
    					mergeObject.setParentStringInFaunaEu(parentNameCache);
464
    					mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
465
    					System.err.println("parentName: " + parentNameCache);
466
					}else{
467
						System.err.println("no zoologicalName: " + parentNode.getTaxon().getName().getTitleCache() +" . "+parentNode.getTaxon().getName().getUuid());
468
					}
469

    
470
				}
471
			}else{
472
				mergeObject.setStatInErms(false);
473
			}
474
			taxa = faunaEuName.getTaxa();
475
			if (!taxa.isEmpty()){
476
				mergeObject.setStatInFaunaEu(true);
477
			}else{
478
				mergeObject.setStatInFaunaEu(false);
479

    
480
			}
481

    
482
			mergeObject.setRankInErms(ermsName.getRank().getLabel());
483
			mergeObject.setRankInFaunaEu(faunaEuName.getRank().getLabel());
484

    
485
			//set parent informations
486

    
487

    
488
			/*
489
			Set<HybridRelationship> parentRelations = zooName.getParentRelationships();
490
			Iterator parentIterator = parentRelations.iterator();
491
			HybridRelationship parentRel;
492
			ZoologicalName parentName;
493
			while (parentIterator.hasNext()){
494
				parentRel = (HybridRelationship)parentIterator.next();
495
				parentName = (ZoologicalName)parentRel.getParentName();
496
				mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
497
				mergeObject.setParentStringInErms(parentName.getNameCache());
498
			}
499

    
500
			parentRelations = zooName2.getParentRelationships();
501
			parentIterator = parentRelations.iterator();
502

    
503
			while (parentIterator.hasNext()){
504
				parentRel = (HybridRelationship)parentIterator.next();
505
				parentName = (ZoologicalName)parentRel.getParentName();
506
				mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
507
				mergeObject.setParentStringInFaunaEu(parentName.getNameCache());
508
			}*/
509
			merge.add(mergeObject);
510
		}
511
//		}
512

    
513
		return merge;
514

    
515
	}
516
}
(1-1/2)