Project

General

Profile

Download (14.8 KB) Statistics
| Branch: | Revision:
1
package eu.etaxonomy.cdm.app.pesi.merging;
2

    
3
import java.io.FileWriter;
4
import java.io.IOException;
5
import java.util.ArrayList;
6
import java.util.Iterator;
7
import java.util.List;
8
import java.util.Set;
9

    
10
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
11
import eu.etaxonomy.cdm.app.common.CdmDestinations;
12
import eu.etaxonomy.cdm.app.common.TestDatabase;
13
import eu.etaxonomy.cdm.database.DbSchemaValidation;
14
import eu.etaxonomy.cdm.database.ICdmDataSource;
15
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
16
import eu.etaxonomy.cdm.io.pesi.merging.FaunaEuErmsMerging;
17
import eu.etaxonomy.cdm.model.common.CdmBase;
18
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
19
import eu.etaxonomy.cdm.model.name.IZoologicalName;
20
import eu.etaxonomy.cdm.model.name.Rank;
21
import eu.etaxonomy.cdm.model.name.TaxonName;
22
import eu.etaxonomy.cdm.model.taxon.Taxon;
23
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
24

    
25
public class FaunaEuErmsFindIdenticalNamesActivator {
26

    
27
	static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2();
28
	//static final ICdmDataSource ermsSource = CdmDestinations.cdm_test_andreasM();
29

    
30
	//TODO hole aus beiden DB alle TaxonNameBases
31

    
32
	private CdmApplicationController initDb(ICdmDataSource db) {
33

    
34
		// Init source DB
35
		CdmApplicationController appCtrInit = null;
36

    
37
		appCtrInit = TestDatabase.initDb(db, DbSchemaValidation.VALIDATE, false);
38

    
39
		return appCtrInit;
40
	}
41

    
42

    
43
	/**
44
	 * @param args
45
	 */
46
	public static void main(String[] args) {
47

    
48
		FaunaEuErmsFindIdenticalNamesActivator sc = new FaunaEuErmsFindIdenticalNamesActivator();
49

    
50
		CdmApplicationController appCtrFaunaEu = sc.initDb(faunaEuropaeaSource);
51
		String sFileName = "c:\\test";
52
		//CdmApplicationController appCtrErms = sc.initDb(ermsSource);
53
		List<String> propertyPaths = new ArrayList<>();
54
		propertyPaths.add("sources.*");
55
		propertyPaths.add("sources.idInSource");
56
		propertyPaths.add("sources.idNamespace");
57
		propertyPaths.add("taxonBases.*");
58
		propertyPaths.add("taxonBases.relationsFromThisTaxon");
59
		propertyPaths.add("taxonBases.taxonNodes.*");
60
		propertyPaths.add("taxonBases.taxonNodes.parent.*");
61
		propertyPaths.add("taxonBases.taxonNodes.parent.taxon.name.*");
62
		System.err.println("Start getIdenticalNames...");
63
		List<TaxonName> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(propertyPaths);
64
		//List<UUID> namesOfIdenticalTaxa = appCtrFaunaEu.getTaxonService().findIdenticalTaxonNameIds(propertyPaths);
65

    
66
		System.err.println("first name: " + namesOfIdenticalTaxa.get(0) + " " + namesOfIdenticalTaxa.size());
67
		TaxonName zooName = namesOfIdenticalTaxa.get(0);
68
		System.err.println(zooName + " nr of taxa " + namesOfIdenticalTaxa.size());
69
		//TaxonNameComparator taxComp = new TaxonNameComparator();
70

    
71
		//Collections.sort(namesOfIdenticalTaxa,taxComp);
72
		System.err.println(namesOfIdenticalTaxa.get(0) + " - " + namesOfIdenticalTaxa.get(1) + " - " + namesOfIdenticalTaxa.get(2));
73
		List<FaunaEuErmsMerging> mergingObjects = new ArrayList<>();
74
		FaunaEuErmsMerging mergeObject;
75
		TaxonName faunaEuTaxName;
76
		TaxonName ermsTaxName;
77

    
78
		mergingObjects= sc.createMergeObjects(namesOfIdenticalTaxa, appCtrFaunaEu);
79

    
80
		sc.writeSameNamesdifferentAuthorToCsv(mergingObjects, sFileName + "_authors.csv");
81
		sc.writeSameNamesdifferentStatusToCsv(mergingObjects, sFileName + "_status.csv");
82
		sc.writeSameNamesToCsVFile(mergingObjects, sFileName + "_names.csv");
83
		sc.writeSameNamesdifferentPhylumToCsv(mergingObjects, sFileName + "_phylum.csv");
84

    
85

    
86
		System.out.println("End merging Fauna Europaea and Erms");
87

    
88
	}
89

    
90
	private boolean writeSameNamesToCsVFile(
91
			List<FaunaEuErmsMerging> mergingObjects, String string) {
92
	    try{
93
    		FileWriter writer = new FileWriter(string);
94

    
95
    	    //create Header
96
    	    String firstLine = "same names";
97
    	    createHeader(writer, firstLine);
98
    		for (FaunaEuErmsMerging merging : mergingObjects){
99
    	    	writeCsvLine(writer, merging) ;
100
    		}
101
    		writer.flush();
102
    		writer.close();
103
    	}catch(IOException e){
104
    	    return false;
105
    	}
106
    	return true;
107
	}
108

    
109
	private boolean writeSameNamesdifferentPhylumToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
110
		try
111
		{
112
		    FileWriter writer = new FileWriter(sfileName);
113

    
114
		    //create Header
115
		   String firstLine = "same names but different phylum";
116
		   createHeader(writer, firstLine);
117

    
118
			//write data
119
			for (FaunaEuErmsMerging merging : mergingObjects){
120
		    	//TODO
121
				if ((merging.getPhylumInErms()== null )^ (merging.getPhylumInFaunaEu()== null)){
122
					writeCsvLine(writer, merging) ;
123
				}else if(!((merging.getPhylumInErms()==null) && (merging.getPhylumInFaunaEu()==null))){
124
					if(!merging.getPhylumInErms().equals(merging.getPhylumInFaunaEu())){
125
						writeCsvLine(writer, merging) ;
126
					}
127
				}
128
			}
129
			writer.flush();
130
			writer.close();
131
		}
132
		catch(IOException e)
133
		{
134
		 return false;
135
		}
136
		return true;
137
	}
138

    
139
	private boolean writeSameNamesdifferentRankToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
140
		try
141
		{
142
		    FileWriter writer = new FileWriter(sfileName);
143
		    String firstLine = "same names but different rank";
144
		    //create Header
145
		    createHeader(writer, firstLine);
146

    
147
			//write data
148
			for (FaunaEuErmsMerging merging : mergingObjects){
149

    
150
				if (!merging.getRankInErms().equals(merging.getRankInFaunaEu())){
151
					writeCsvLine(writer, merging);
152
				}
153
			}
154
			writer.flush();
155
			writer.close();
156
		}
157
		catch(IOException e)
158
		{
159
		 return false;
160
		}
161
		return true;
162
	}
163

    
164
	private void createHeader(FileWriter writer, String firstLine) throws IOException{
165
		 	writer.append(firstLine);
166
		    writer.append('\n');
167
		    writer.append("uuid in Fauna Europaea");
168
			writer.append(';');
169
			writer.append("id in Fauna Europaea");
170
			writer.append(';');
171
			writer.append("name");
172
			writer.append(';');
173
			writer.append("author");
174
			writer.append(';');
175
			writer.append("rank");
176
			writer.append(';');
177
			writer.append("state");
178
			writer.append(';');
179
			writer.append("phylum");
180
			writer.append(';');
181
			writer.append("parent");
182
			writer.append(';');
183
			writer.append("parent rank");
184
			writer.append(';');
185

    
186
			writer.append("uuid in Erms");
187
			writer.append(';');
188
			writer.append("id in Erms");
189
			writer.append(';');
190
			writer.append("name");
191
			writer.append(';');
192
			writer.append("author");
193
			writer.append(';');
194
			writer.append("rank");
195
			writer.append(';');
196
			writer.append("state");
197
			writer.append(';');
198
			writer.append("phylum");
199
			writer.append(';');
200
			writer.append("parent");
201
			writer.append(';');
202
			writer.append("parent rank");
203
			writer.append('\n');
204
	}
205

    
206
	private boolean writeSameNamesdifferentStatusToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
207
		try
208
		{
209
		    FileWriter writer = new FileWriter(sfileName);
210

    
211
		    //create Header
212
		    String firstLine = "same names but different status";
213
		    createHeader(writer, firstLine);
214

    
215
			//write data
216
			for (FaunaEuErmsMerging merging : mergingObjects){
217

    
218
				if (merging.isStatInErms()^merging.isStatInFaunaEu()){
219
					 writeCsvLine(writer, merging);
220
				}
221
			}
222

    
223
			writer.flush();
224
			writer.close();
225
		}
226
		catch(IOException e)
227
		{
228
		 return false;
229
		}
230
		return true;
231
	}
232

    
233
	private boolean writeSameNamesdifferentAuthorToCsv(List<FaunaEuErmsMerging> mergingObjects, String sfileName){
234
		try
235
		{
236
		    FileWriter writer = new FileWriter(sfileName);
237

    
238
		    //create Header
239
		   String firstLine = "same names but different authors";
240
		   createHeader(writer, firstLine);
241

    
242
			//write data
243
			for (FaunaEuErmsMerging merging : mergingObjects){
244

    
245
				if (!merging.getAuthorInErms().equals(merging.getAuthorInFaunaEu())){
246
					 writeCsvLine(writer, merging);
247
				}
248
			}
249

    
250

    
251
			writer.flush();
252
			writer.close();
253
		}
254
		catch(IOException e)
255
		{
256
		 return false;
257
		}
258
		return true;
259
	}
260

    
261
	private void writeCsvLine(FileWriter writer, FaunaEuErmsMerging merging) throws IOException{
262

    
263
		writer.append(merging.getUuidFaunaEu());
264
		writer.append(';');
265
		writer.append(merging.getIdInFaunaEu());
266
		writer.append(';');
267
		writer.append(merging.getNameCacheInFaunaEu());
268
		writer.append(';');
269
		writer.append(merging.getAuthorInFaunaEu());
270
		writer.append(';');
271
		writer.append(merging.getRankInFaunaEu());
272
		writer.append(';');
273
		if (merging.isStatInFaunaEu()){
274
			writer.append("accepted");
275
		}else{
276
			writer.append("synonym");
277
		}
278
		writer.append(';');
279
		writer.append(merging.getPhylumInFaunaEu());
280
		writer.append(';');
281
		writer.append(merging.getParentStringInFaunaEu());
282
		writer.append(';');
283
		writer.append(merging.getParentRankStringInFaunaEu());
284
		writer.append(';');
285

    
286
		writer.append(merging.getUuidErms());
287
		writer.append(';');
288
		writer.append(merging.getIdInErms());
289
		writer.append(';');
290
		writer.append(merging.getNameCacheInErms());
291
		writer.append(';');
292
		writer.append(merging.getAuthorInErms());
293
		writer.append(';');
294
		writer.append(merging.getRankInErms());
295
		writer.append(';');
296
		if (merging.isStatInErms()){
297
			writer.append("accepted");
298
		}else{
299
			writer.append("synonym");
300
		}
301

    
302
		writer.append(';');
303
		writer.append(merging.getPhylumInErms());
304
		writer.append(';');
305
		writer.append(merging.getParentStringInErms());
306
		writer.append(';');
307
		writer.append(merging.getParentRankStringInErms());
308
		writer.append('\n');
309
	}
310

    
311

    
312
	private List<FaunaEuErmsMerging> createMergeObjects(List<TaxonName> names, CdmApplicationController appCtr){
313

    
314
		List<FaunaEuErmsMerging> merge = new ArrayList<>();
315
		TaxonName zooName, zooName2;
316
		FaunaEuErmsMerging mergeObject;
317
		String idInSource1;
318
		for (int i = 0; i<names.size()-1; i=i+2){
319
			zooName = names.get(i);
320
			zooName2 = names.get(i+1);
321
			mergeObject = new FaunaEuErmsMerging();
322
			//TODO:überprüfen, ob die beiden Namen identisch sind und aus unterschiedlichen DB kommen
323

    
324
			//getPhylum
325
			String phylum1 = null;
326
			if (!zooName.getRank().isHigher(Rank.PHYLUM())){
327
				phylum1 =appCtr.getTaxonService().getPhylumName(zooName);
328
			}
329

    
330
			String phylum2 = null;
331
			if (!zooName2.getRank().isHigher(Rank.PHYLUM())){
332
				phylum2 = appCtr.getTaxonService().getPhylumName(zooName2);
333
			}
334
			mergeObject.setPhylumInErms(phylum1);
335
			mergeObject.setPhylumInFaunaEu(phylum2);
336

    
337
			//getUuids
338
			mergeObject.setUuidErms(zooName.getUuid().toString());
339
			mergeObject.setUuidFaunaEu(zooName.getUuid().toString());
340

    
341
			Iterator<IdentifiableSource> sources = zooName.getSources().iterator();
342
			if (sources.hasNext()){
343
				IdentifiableSource source = sources.next();
344
				idInSource1 = source.getIdInSource();
345
				mergeObject.setIdInErms(idInSource1);
346
			}
347
			sources = zooName2.getSources().iterator();
348
			if (sources.hasNext()){
349
				IdentifiableSource source = sources.next();
350
				idInSource1 = source.getIdInSource();
351
				mergeObject.setIdInFaunaEu(idInSource1);
352
			}
353

    
354
			mergeObject.setNameCacheInErms(zooName.getNameCache());
355
			mergeObject.setNameCacheInFaunaEu(zooName2.getNameCache());
356

    
357
			mergeObject.setAuthorInErms(zooName.getAuthorshipCache());
358
			mergeObject.setAuthorInFaunaEu(zooName2.getAuthorshipCache());
359
			Set<Taxon> taxa = zooName.getTaxa();
360
			if (!taxa.isEmpty()){
361
				mergeObject.setStatInErms(true);
362
				Iterator<Taxon> taxaIterator = taxa.iterator();
363
				Taxon taxon = null;
364
				while (taxaIterator.hasNext()){
365
					taxon = taxaIterator.next();
366
					if (!taxon.isMisapplication()){
367
						break;
368
					}
369
				}
370
				Set<TaxonNode> nodes = taxon.getTaxonNodes();
371
				Iterator<TaxonNode> taxonNodeIterator = nodes.iterator();
372
				TaxonNode node, parentNode = null;
373
				while (taxonNodeIterator.hasNext()){
374
					node = taxonNodeIterator.next();
375
					if (!node.isTopmostNode()){
376
						parentNode = node.getParent();
377
					}
378
				}
379
				//TODO: ändern mit erweitertem Initializer..
380
				if (parentNode != null){
381
				    TaxonName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName());
382
					String parentNameCache = parentName.getNameCache();
383
					mergeObject.setParentStringInErms(parentNameCache);
384
					mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
385
					//System.err.println("parentName: " + parentNameCache);
386
				}
387
			}else{
388
				mergeObject.setStatInErms(false);
389
			}
390
			taxa = zooName2.getTaxa();
391
			if (!taxa.isEmpty()){
392
				mergeObject.setStatInFaunaEu(true);
393
				Iterator<Taxon> taxaIterator = taxa.iterator();
394
				Taxon taxon = null;
395
				while (taxaIterator.hasNext()){
396
					taxon = taxaIterator.next();
397
					if (!taxon.isMisapplication()){
398
						break;
399
					}
400
				}
401
				Set<TaxonNode> nodes = taxon.getTaxonNodes();
402
				Iterator<TaxonNode> taxonNodeIterator = nodes.iterator();
403
				TaxonNode node, parentNode = null;
404
				while (taxonNodeIterator.hasNext()){
405
					node = taxonNodeIterator.next();
406
					if (!node.isTopmostNode()){
407
						parentNode = node.getParent();
408
					}
409
				}
410
				//TODO: ändern mit erweitertem Initializer..
411
				if (parentNode != null){
412
					if (parentNode.getTaxon().getName().isZoological()){
413

    
414
    					IZoologicalName parentName = CdmBase.deproxy(parentNode.getTaxon().getName());
415
    					String parentNameCache = parentName.getNameCache();
416
    					mergeObject.setParentStringInFaunaEu(parentNameCache);
417
    					mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
418
    					System.err.println("parentName: " + parentNameCache);
419
					}else{
420
						System.err.println("no zoologicalName: " + parentNode.getTaxon().getName().getTitleCache() +" . "+parentNode.getTaxon().getName().getUuid());
421
					}
422

    
423
				}
424
			}else{
425
				mergeObject.setStatInErms(false);
426
			}
427
			taxa = zooName2.getTaxa();
428
			if (!taxa.isEmpty()){
429
				mergeObject.setStatInFaunaEu(true);
430
			}else{
431
				mergeObject.setStatInFaunaEu(false);
432

    
433
			}
434

    
435
			mergeObject.setRankInErms(zooName.getRank().getLabel());
436
			mergeObject.setRankInFaunaEu(zooName2.getRank().getLabel());
437

    
438
			//set parent informations
439

    
440

    
441
			/*
442
			Set<HybridRelationship> parentRelations = zooName.getParentRelationships();
443
			Iterator parentIterator = parentRelations.iterator();
444
			HybridRelationship parentRel;
445
			ZoologicalName parentName;
446
			while (parentIterator.hasNext()){
447
				parentRel = (HybridRelationship)parentIterator.next();
448
				parentName = (ZoologicalName)parentRel.getParentName();
449
				mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
450
				mergeObject.setParentStringInErms(parentName.getNameCache());
451
			}
452

    
453
			parentRelations = zooName2.getParentRelationships();
454
			parentIterator = parentRelations.iterator();
455

    
456
			while (parentIterator.hasNext()){
457
				parentRel = (HybridRelationship)parentIterator.next();
458
				parentName = (ZoologicalName)parentRel.getParentName();
459
				mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
460
				mergeObject.setParentStringInFaunaEu(parentName.getNameCache());
461
			}*/
462
			merge.add(mergeObject);
463
		}
464

    
465
		return merge;
466

    
467
	}
468
}
(1-1/2)