Project

General

Profile

Download (16 KB) Statistics
| Branch: | Revision:
1
package eu.etaxonomy.cdm.app.pesi.merging;
2

    
3
import java.io.FileWriter;
4
import java.io.IOException;
5
import java.util.ArrayList;
6
import java.util.Collections;
7
import java.util.Iterator;
8
import java.util.List;
9
import java.util.Set;
10
import java.util.TreeSet;
11
import java.util.UUID;
12

    
13
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
14
import eu.etaxonomy.cdm.app.common.CdmDestinations;
15
import eu.etaxonomy.cdm.app.pesi.FaunaEuropaeaSources;
16
import eu.etaxonomy.cdm.app.util.TestDatabase;
17
import eu.etaxonomy.cdm.database.DbSchemaValidation;
18
import eu.etaxonomy.cdm.database.ICdmDataSource;
19
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
20
import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
21
import eu.etaxonomy.cdm.io.common.Source;
22
import eu.etaxonomy.cdm.io.faunaEuropaea.FaunaEuropaeaImportConfigurator;
23
import eu.etaxonomy.cdm.io.pesi.merging.FaunaEuErmsMerging;
24
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
25
import eu.etaxonomy.cdm.model.common.OriginalSourceBase;
26
import eu.etaxonomy.cdm.model.description.Feature;
27
import eu.etaxonomy.cdm.model.description.FeatureNode;
28
import eu.etaxonomy.cdm.model.description.FeatureTree;
29
import eu.etaxonomy.cdm.model.name.HybridRelationship;
30
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
31
import eu.etaxonomy.cdm.model.name.NonViralName;
32
import eu.etaxonomy.cdm.model.name.Rank;
33
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
34
import eu.etaxonomy.cdm.model.name.TaxonNameComparator;
35
import eu.etaxonomy.cdm.model.name.ZoologicalName;
36
import eu.etaxonomy.cdm.model.taxon.Taxon;
37
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
38
import eu.etaxonomy.cdm.model.taxon.TaxonComparator;
39
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
40
import eu.etaxonomy.cdm.persistence.dao.hibernate.HibernateProxyHelperExtended;
41

    
42
public class FaunaEuErmsFindIdenticalNamesActivator {
43

    
44
	static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.cdm_test_jaxb2();
45
	//static final ICdmDataSource ermsSource = CdmDestinations.cdm_test_andreasM();
46
	
47
	//TODO hole aus beiden DB alle TaxonNameBases
48
	
49
	
50
	private CdmApplicationController initDb(ICdmDataSource db) {
51

    
52
		// Init source DB
53
		CdmApplicationController appCtrInit = null;
54

    
55
		appCtrInit = TestDatabase.initDb(db, DbSchemaValidation.VALIDATE, false);
56

    
57
		return appCtrInit;
58
	}
59
	
60
	
61
	/**
62
	 * @param args
63
	 */
64
	public static void main(String[] args) {
65
		
66
		FaunaEuErmsFindIdenticalNamesActivator sc = new FaunaEuErmsFindIdenticalNamesActivator();
67
		
68
		CdmApplicationController appCtrFaunaEu = sc.initDb(faunaEuropaeaSource);
69
		String sFileName = "c:\\test";
70
		//CdmApplicationController appCtrErms = sc.initDb(ermsSource);
71
		List<String> propertyPaths = new ArrayList<String>();
72
		propertyPaths.add("sources.*");
73
		propertyPaths.add("sources.idInSource");
74
		propertyPaths.add("sources.idNamespace");
75
		propertyPaths.add("taxonBases.*");
76
		propertyPaths.add("taxonBases.relationsFromThisTaxon");
77
		propertyPaths.add("taxonBases.taxonNodes.*");
78
		propertyPaths.add("taxonBases.taxonNodes.parent.*");
79
		propertyPaths.add("taxonBases.taxonNodes.parent.taxon.name.*");
80
		System.err.println("Start getIdenticalNames...");
81
		List<TaxonNameBase> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(propertyPaths);
82
		//List<UUID> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(propertyPaths);
83
		
84
		System.err.println("first name: " + namesOfIdenticalTaxa.get(0) + " " + namesOfIdenticalTaxa.size());
85
		TaxonNameBase zooName = (TaxonNameBase)namesOfIdenticalTaxa.get(0);
86
		System.err.println(zooName + " nr of taxa " + namesOfIdenticalTaxa.size());
87
		//TaxonNameComparator taxComp = new TaxonNameComparator();
88
		
89
		//Collections.sort(namesOfIdenticalTaxa,taxComp);
90
		System.err.println(namesOfIdenticalTaxa.get(0) + " - " + namesOfIdenticalTaxa.get(1) + " - " + namesOfIdenticalTaxa.get(2));
91
		List<FaunaEuErmsMerging> mergingObjects = new ArrayList<FaunaEuErmsMerging>();
92
		FaunaEuErmsMerging mergeObject;
93
		TaxonNameBase faunaEuTaxName;
94
		TaxonNameBase ermsTaxName;
95
				
96
		mergingObjects= sc.createMergeObjects(namesOfIdenticalTaxa, appCtrFaunaEu);
97
		
98
		sc.writeSameNamesdifferentAuthorToCsv(mergingObjects, sFileName + "_authors.csv");
99
		sc.writeSameNamesdifferentStatusToCsv(mergingObjects, sFileName + "_status.csv");
100
		sc.writeSameNamesToCsVFile(mergingObjects, sFileName + "_names.csv");
101
		sc.writeSameNamesdifferentPhylumToCsv(mergingObjects, sFileName + "_phylum.csv");
102
		
103
		
104
		System.out.println("End merging Fauna Europaea and Erms");
105
		
106
	}
107
	
108
	private boolean writeSameNamesToCsVFile(
109
			List<FaunaEuErmsMerging> mergingObjects, String string) {
110
	    try{
111
		FileWriter writer = new FileWriter(string);
112
	
113
	    //create Header
114
	    String firstLine = "same names";
115
	    createHeader(writer, firstLine);
116
		for (FaunaEuErmsMerging merging : mergingObjects){
117
	    	writeCsvLine(writer, merging) ;
118
		}
119
		writer.flush();
120
		writer.close();
121
	}
122
	catch(IOException e)
123
	{
124
	 return false;
125
	} 
126
	return true;
127
	}
128

    
129

    
130
	private boolean writeSameNamesdifferentPhylumToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
131
		try
132
		{
133
		    FileWriter writer = new FileWriter(sfileName);
134
		    
135
		    //create Header
136
		   String firstLine = "same names but different phylum";
137
		   createHeader(writer, firstLine);
138
		    
139
			//write data
140
			for (FaunaEuErmsMerging merging : mergingObjects){
141
		    	//TODO
142
				if ((merging.getPhylumInErms()== null )^ (merging.getPhylumInFaunaEu()== null)){
143
					writeCsvLine(writer, merging) ;
144
				}else if(!((merging.getPhylumInErms()==null) && (merging.getPhylumInFaunaEu()==null))){ 
145
					if(!merging.getPhylumInErms().equals(merging.getPhylumInFaunaEu())){
146
						writeCsvLine(writer, merging) ;
147
					}
148
				}
149
			}
150
			writer.flush();
151
			writer.close();
152
		}
153
		catch(IOException e)
154
		{
155
		 return false;
156
		} 
157
		return true;
158
	}
159
	
160
	private boolean writeSameNamesdifferentRankToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
161
		try
162
		{
163
		    FileWriter writer = new FileWriter(sfileName);
164
		    String firstLine = "same names but different rank";
165
		    //create Header
166
		    createHeader(writer, firstLine);
167
			
168
			//write data
169
			for (FaunaEuErmsMerging merging : mergingObjects){
170
		    	
171
				if (!merging.getRankInErms().equals(merging.getRankInFaunaEu())){
172
					writeCsvLine(writer, merging);
173
				}
174
			}
175
			writer.flush();
176
			writer.close();
177
		}
178
		catch(IOException e)
179
		{
180
		 return false;
181
		} 
182
		return true;
183
	}
184
	
185
	private void createHeader(FileWriter writer, String firstLine) throws IOException{
186
		 	writer.append(firstLine);
187
		    writer.append('\n');
188
		    writer.append("uuid in Fauna Europaea");
189
			writer.append(';');
190
			writer.append("id in Fauna Europaea");
191
			writer.append(';');
192
			writer.append("name");
193
			writer.append(';');
194
			writer.append("author");
195
			writer.append(';');
196
			writer.append("rank");
197
			writer.append(';');
198
			writer.append("state");
199
			writer.append(';');
200
			writer.append("phylum");
201
			writer.append(';');
202
			writer.append("parent");
203
			writer.append(';');
204
			writer.append("parent rank");
205
			writer.append(';');
206
			
207
			writer.append("uuid in Erms");
208
			writer.append(';');
209
			writer.append("id in Erms");
210
			writer.append(';');
211
			writer.append("name");
212
			writer.append(';');
213
			writer.append("author");
214
			writer.append(';');
215
			writer.append("rank");
216
			writer.append(';');
217
			writer.append("state");
218
			writer.append(';');
219
			writer.append("phylum");
220
			writer.append(';');
221
			writer.append("parent");
222
			writer.append(';');
223
			writer.append("parent rank");
224
			writer.append('\n');
225
	}
226
	
227
	private boolean writeSameNamesdifferentStatusToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
228
		try
229
		{
230
		    FileWriter writer = new FileWriter(sfileName);
231
		    
232
		    //create Header
233
		    String firstLine = "same names but different status";
234
		    createHeader(writer, firstLine);
235
		    
236
			//write data
237
			for (FaunaEuErmsMerging merging : mergingObjects){
238
		    	
239
				if (merging.isStatInErms()^merging.isStatInFaunaEu()){
240
					 writeCsvLine(writer, merging);
241
				}
242
			}
243
			
244
 
245
			writer.flush();
246
			writer.close();
247
		}
248
		catch(IOException e)
249
		{
250
		 return false;
251
		} 
252
		return true;
253
	}
254
	
255
	private boolean writeSameNamesdifferentAuthorToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
256
		try
257
		{
258
		    FileWriter writer = new FileWriter(sfileName);
259
		    
260
		    //create Header
261
		   String firstLine = "same names but different authors";
262
		   createHeader(writer, firstLine);
263
		    
264
			//write data
265
			for (FaunaEuErmsMerging merging : mergingObjects){
266
		    	
267
				if (!merging.getAuthorInErms().equals(merging.getAuthorInFaunaEu())){
268
					 writeCsvLine(writer, merging);
269
				}
270
			}
271
			
272
 
273
			writer.flush();
274
			writer.close();
275
		}
276
		catch(IOException e)
277
		{
278
		 return false;
279
		} 
280
		return true;
281
	}
282
	
283
	private void writeCsvLine(FileWriter writer, FaunaEuErmsMerging merging) throws IOException{
284
		
285
		writer.append(merging.getUuidFaunaEu());
286
		writer.append(';');
287
		writer.append(merging.getIdInFaunaEu());
288
		writer.append(';');
289
		writer.append(merging.getNameCacheInFaunaEu());
290
		writer.append(';');
291
		writer.append(merging.getAuthorInFaunaEu());
292
		writer.append(';');
293
		writer.append(merging.getRankInFaunaEu());
294
		writer.append(';');
295
		if (merging.isStatInFaunaEu()){
296
			writer.append("accepted");
297
		}else{
298
			writer.append("synonym");
299
		}
300
		writer.append(';');
301
		writer.append(merging.getPhylumInFaunaEu());
302
		writer.append(';');
303
		writer.append(merging.getParentStringInFaunaEu());
304
		writer.append(';');
305
		writer.append(merging.getParentRankStringInFaunaEu());
306
		writer.append(';');
307
		
308
		writer.append(merging.getUuidErms());
309
		writer.append(';');
310
		writer.append(merging.getIdInErms());
311
		writer.append(';');
312
		writer.append(merging.getNameCacheInErms());
313
		writer.append(';');
314
		writer.append(merging.getAuthorInErms());
315
		writer.append(';');
316
		writer.append(merging.getRankInErms());
317
		writer.append(';');
318
		if (merging.isStatInErms()){
319
			writer.append("accepted");
320
		}else{
321
			writer.append("synonym");
322
		}
323
		
324
		writer.append(';');
325
		writer.append(merging.getPhylumInErms());
326
		writer.append(';');
327
		writer.append(merging.getParentStringInErms());
328
		writer.append(';');
329
		writer.append(merging.getParentRankStringInErms());
330
		writer.append('\n');
331
	}
332
	
333
	
334
	private List<FaunaEuErmsMerging> createMergeObjects(List<TaxonNameBase> names, CdmApplicationController appCtr){
335
		
336
		List<FaunaEuErmsMerging> merge = new ArrayList<FaunaEuErmsMerging>();
337
		ZoologicalName zooName, zooName2;
338
		FaunaEuErmsMerging mergeObject;
339
		String idInSource1;
340
		for (int i = 0; i<names.size()-1; i=i+2){
341
			zooName = (ZoologicalName)names.get(i);
342
			zooName2 = (ZoologicalName)names.get(i+1);
343
			mergeObject = new FaunaEuErmsMerging();
344
			//TODO:?berpr?fen, ob die beiden Namen identisch sind und aus unterschiedlichen DB kommen
345
			
346
			//getPhylum
347
			String phylum1 = null;
348
			if (!zooName.getRank().isHigher(Rank.PHYLUM())){
349
				phylum1 =appCtr.getTaxonService().getPhylumName(zooName);
350
			}
351
			
352
			String phylum2 = null;
353
			if (!zooName2.getRank().isHigher(Rank.PHYLUM())){
354
				phylum2 = appCtr.getTaxonService().getPhylumName(zooName2);
355
			}
356
			mergeObject.setPhylumInErms(phylum1);
357
			mergeObject.setPhylumInFaunaEu(phylum2);
358
			
359
			//getUuids
360
			mergeObject.setUuidErms(zooName.getUuid().toString());
361
			mergeObject.setUuidFaunaEu(zooName.getUuid().toString());
362
			
363
			Iterator sources = zooName.getSources().iterator();
364
			if (sources.hasNext()){
365
				IdentifiableSource source = (IdentifiableSource)sources.next();
366
				idInSource1 = source.getIdInSource();
367
				mergeObject.setIdInErms(idInSource1);
368
			}
369
			sources = zooName2.getSources().iterator();
370
			if (sources.hasNext()){
371
				IdentifiableSource source = (IdentifiableSource)sources.next();
372
				idInSource1 = source.getIdInSource();
373
				mergeObject.setIdInFaunaEu(idInSource1);
374
			}
375
			
376
			mergeObject.setNameCacheInErms(zooName.getNameCache());
377
			mergeObject.setNameCacheInFaunaEu(zooName2.getNameCache());
378
			
379
			mergeObject.setAuthorInErms(zooName.getAuthorshipCache());
380
			mergeObject.setAuthorInFaunaEu(zooName2.getAuthorshipCache());
381
			Set<Taxon> taxa = zooName.getTaxa();
382
			if (!taxa.isEmpty()){
383
				mergeObject.setStatInErms(true);
384
				Iterator taxaIterator = taxa.iterator();
385
				Taxon taxon = null;
386
				while (taxaIterator.hasNext()){
387
					taxon = (Taxon) taxaIterator.next();
388
					if (!taxon.isMisapplication()){
389
						break;
390
					}
391
				}
392
				Set<TaxonNode> nodes = taxon.getTaxonNodes();
393
				Iterator taxonNodeIterator = nodes.iterator();
394
				TaxonNode node, parentNode = null;
395
				while (taxonNodeIterator.hasNext()){
396
					node = (TaxonNode)taxonNodeIterator.next();
397
					if (!node.isTopmostNode()){
398
						parentNode = node.getParent();
399
					}
400
				}
401
				//TODO: ?ndern mit erweitertem Initializer..
402
				if (parentNode != null){
403
					ZoologicalName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName(), ZoologicalName.class);
404
					String parentNameCache = parentName.getNameCache();
405
					mergeObject.setParentStringInErms(parentNameCache);
406
					mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
407
					//System.err.println("parentName: " + parentNameCache);
408
				}
409
			}else{
410
				mergeObject.setStatInErms(false);
411
			}
412
			taxa = zooName2.getTaxa();
413
			if (!taxa.isEmpty()){
414
				mergeObject.setStatInFaunaEu(true);
415
				Iterator taxaIterator = taxa.iterator();
416
				Taxon taxon = null;
417
				while (taxaIterator.hasNext()){
418
					taxon = (Taxon) taxaIterator.next();
419
					if (!taxon.isMisapplication()){
420
						break;
421
					}
422
				}
423
				Set<TaxonNode> nodes = taxon.getTaxonNodes();
424
				Iterator taxonNodeIterator = nodes.iterator();
425
				TaxonNode node, parentNode = null;
426
				while (taxonNodeIterator.hasNext()){
427
					node = (TaxonNode)taxonNodeIterator.next();
428
					if (!node.isTopmostNode()){
429
						parentNode = node.getParent();
430
					}
431
				}
432
				//TODO: ?ndern mit erweitertem Initializer..
433
				if (parentNode != null){
434
					if (parentNode.getTaxon().getName() instanceof ZoologicalName){
435
					
436
					ZoologicalName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName(), ZoologicalName.class);
437
					String parentNameCache = parentName.getNameCache();
438
					mergeObject.setParentStringInFaunaEu(parentNameCache);
439
					mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
440
					System.err.println("parentName: " + parentNameCache);
441
					}else{
442
						System.err.println("no zoologicalName: " + parentNode.getTaxon().getName().getTitleCache() +" . "+parentNode.getTaxon().getName().getUuid());
443
					}
444
					
445
				}
446
			}else{
447
				mergeObject.setStatInErms(false);
448
			}
449
			taxa = zooName2.getTaxa();
450
			if (!taxa.isEmpty()){
451
				mergeObject.setStatInFaunaEu(true);
452
			}else{
453
				mergeObject.setStatInFaunaEu(false);
454
				
455
			}
456
			
457
			mergeObject.setRankInErms(zooName.getRank().getLabel());
458
			mergeObject.setRankInFaunaEu(zooName2.getRank().getLabel());
459
			
460
			
461
			
462
			
463
			//set parent informations
464
			
465
			
466
			/*
467
			Set<HybridRelationship> parentRelations = zooName.getParentRelationships();
468
			Iterator parentIterator = parentRelations.iterator();
469
			HybridRelationship parentRel;
470
			ZoologicalName parentName;
471
			while (parentIterator.hasNext()){
472
				parentRel = (HybridRelationship)parentIterator.next();
473
				parentName = (ZoologicalName)parentRel.getParentName();
474
				mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
475
				mergeObject.setParentStringInErms(parentName.getNameCache());
476
			}
477
			
478
			parentRelations = zooName2.getParentRelationships();
479
			parentIterator = parentRelations.iterator();
480
		
481
			while (parentIterator.hasNext()){
482
				parentRel = (HybridRelationship)parentIterator.next();
483
				parentName = (ZoologicalName)parentRel.getParentName();
484
				mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
485
				mergeObject.setParentStringInFaunaEu(parentName.getNameCache());
486
			}*/
487
			merge.add(mergeObject);
488
		}
489
		
490
		return merge;
491
		
492
		
493
	}
494
	
495
}
(1-1/2)