Revision b5b0fcf5
Added by Andreas Müller over 4 years ago
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/FaunaEuErmsFindIdenticalNamesActivator.java | ||
---|---|---|
1 |
package eu.etaxonomy.cdm.app.pesi.merging; |
|
2 |
|
|
3 |
import java.io.FileWriter; |
|
4 |
import java.io.IOException; |
|
5 |
import java.util.ArrayList; |
|
6 |
import java.util.Iterator; |
|
7 |
import java.util.List; |
|
8 |
import java.util.Map; |
|
9 |
import java.util.Set; |
|
10 |
import java.util.UUID; |
|
11 |
|
|
12 |
import org.apache.log4j.Logger; |
|
13 |
|
|
14 |
import eu.etaxonomy.cdm.api.application.CdmApplicationController; |
|
15 |
import eu.etaxonomy.cdm.app.common.CdmDestinations; |
|
16 |
import eu.etaxonomy.cdm.database.DbSchemaValidation; |
|
17 |
import eu.etaxonomy.cdm.database.ICdmDataSource; |
|
18 |
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper; |
|
19 |
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController; |
|
20 |
import eu.etaxonomy.cdm.io.pesi.merging.PesiMergeObject; |
|
21 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
|
22 |
import eu.etaxonomy.cdm.model.common.IdentifiableSource; |
|
23 |
import eu.etaxonomy.cdm.model.name.IZoologicalName; |
|
24 |
import eu.etaxonomy.cdm.model.name.Rank; |
|
25 |
import eu.etaxonomy.cdm.model.name.TaxonName; |
|
26 |
import eu.etaxonomy.cdm.model.reference.Reference; |
|
27 |
import eu.etaxonomy.cdm.model.taxon.Classification; |
|
28 |
import eu.etaxonomy.cdm.model.taxon.Synonym; |
|
29 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
30 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
|
31 |
import eu.etaxonomy.cdm.model.taxon.TaxonNode; |
|
32 |
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto; |
|
33 |
|
|
34 |
public class FaunaEuErmsFindIdenticalNamesActivator { |
|
35 |
|
|
36 |
private static final Logger logger = Logger.getLogger(FaunaEuErmsFindIdenticalNamesActivator.class); |
|
37 |
|
|
38 |
//static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2(); |
|
39 |
static final ICdmDataSource pesiSource = CdmDestinations.cdm_test_local_faunaEu_mysql(); |
|
40 |
|
|
41 |
static final String path = "C:\\Users\\k.luther\\test"; |
|
42 |
|
|
43 |
private static UUID faunaEuSecUuid = UUID.fromString("6786d863-75d4-4796-b916-c1c3dff4cb70"); |
|
44 |
private static UUID ermsSecUuid = UUID.fromString("7744bc26-f914-42c4-b54a-dd2a030a8bb7"); |
|
45 |
private static UUID ifSecUuid; |
|
46 |
private static UUID emSecUuid; |
|
47 |
|
|
48 |
private void invoke(ICdmDataSource source){ |
|
49 |
|
|
50 |
CdmApplicationController app = CdmIoApplicationController.NewInstance(source, DbSchemaValidation.VALIDATE, false); |
|
51 |
|
|
52 |
List<String> propertyPaths = new ArrayList<>(); |
|
53 |
propertyPaths.add("sources.*"); |
|
54 |
propertyPaths.add("sources.idInSource"); |
|
55 |
propertyPaths.add("sources.idNamespace"); |
|
56 |
propertyPaths.add("taxonBases.*"); |
|
57 |
propertyPaths.add("taxonBases.relationsFromThisTaxon"); |
|
58 |
propertyPaths.add("taxonBases.taxonNodes.*"); |
|
59 |
propertyPaths.add("taxonBases.taxonNodes.parent.*"); |
|
60 |
propertyPaths.add("taxonBases.taxonNodes.childNodes.*"); |
|
61 |
propertyPaths.add("taxonBases.taxonNodes.childNodes.classification.rootNode.childNodes.*"); |
|
62 |
propertyPaths.add("taxonBases.taxonNodes.parent.taxon.name.*"); |
|
63 |
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.*"); |
|
64 |
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.*"); |
|
65 |
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.classification.rootNode.childNodes.*"); |
|
66 |
System.out.println("Start getIdenticalNames..."); |
|
67 |
|
|
68 |
Reference faunaEuSec = app.getReferenceService().load(faunaEuSecUuid); |
|
69 |
Reference ermsSec = app.getReferenceService().load(ermsSecUuid); |
|
70 |
Map<String, List<TaxonName>> namesOfIdenticalTaxa = app.getTaxonService().findIdenticalTaxonNameIds(ermsSec, faunaEuSec, propertyPaths); |
|
71 |
|
|
72 |
System.out.println("Start creating merging objects"); |
|
73 |
List<PesiMergeObject> mergingObjects= createMergeObjects(namesOfIdenticalTaxa, app); |
|
74 |
boolean resultOK = true; |
|
75 |
System.out.println("Start creating csv files"); |
|
76 |
resultOK &= writeSameNamesDifferentAuthorToCsv(mergingObjects, path + "_authors.csv"); |
|
77 |
resultOK &= writeSameNamesDifferentStatusToCsv(mergingObjects, path + "_status.csv"); |
|
78 |
resultOK &= writeSameNamesToCsvFile(mergingObjects, path + "_names.csv"); |
|
79 |
resultOK &= writeSameNamesDifferentPhylumToCsv(mergingObjects, path + "_phylum.csv"); |
|
80 |
resultOK &= writeSameNamesDifferentParentToCsv(mergingObjects, path + "parent.csv"); |
|
81 |
|
|
82 |
System.out.println("End merging Fauna Europaea and Erms: " + resultOK); |
|
83 |
} |
|
84 |
|
|
85 |
private boolean writeSameNamesToCsvFile( |
|
86 |
List<PesiMergeObject> mergingObjects, String string) { |
|
87 |
|
|
88 |
try{ |
|
89 |
FileWriter writer = new FileWriter(string); |
|
90 |
|
|
91 |
//create Header |
|
92 |
String firstLine = "same names"; |
|
93 |
createHeader(writer, firstLine); |
|
94 |
for (PesiMergeObject merging : mergingObjects){ |
|
95 |
writeCsvLine(writer, merging) ; |
|
96 |
} |
|
97 |
writer.flush(); |
|
98 |
writer.close(); |
|
99 |
return true; |
|
100 |
}catch(IOException e){ |
|
101 |
logger.error(e.getMessage()); |
|
102 |
return false; |
|
103 |
} |
|
104 |
} |
|
105 |
|
|
106 |
private boolean writeSameNamesDifferentPhylumToCsv( |
|
107 |
List<PesiMergeObject> mergingObjects, String sfileName){ |
|
108 |
|
|
109 |
try{ |
|
110 |
FileWriter writer = new FileWriter(sfileName); |
|
111 |
|
|
112 |
//create Header |
|
113 |
String firstLine = "same names but different phylum"; |
|
114 |
createHeader(writer, firstLine); |
|
115 |
|
|
116 |
//write data |
|
117 |
for (PesiMergeObject merging : mergingObjects){ |
|
118 |
//TODO |
|
119 |
if ((merging.getPhylumInErms()== null )^ (merging.getPhylumInFaunaEu()== null)){ |
|
120 |
writeCsvLine(writer, merging) ; |
|
121 |
}else if(!((merging.getPhylumInErms()==null) && (merging.getPhylumInFaunaEu()==null))){ |
|
122 |
if(!merging.getPhylumInErms().equals(merging.getPhylumInFaunaEu())){ |
|
123 |
writeCsvLine(writer, merging) ; |
|
124 |
} |
|
125 |
} |
|
126 |
} |
|
127 |
writer.flush(); |
|
128 |
writer.close(); |
|
129 |
return true; |
|
130 |
}catch(IOException e){ |
|
131 |
logger.error(e.getMessage()); |
|
132 |
return false; |
|
133 |
} |
|
134 |
} |
|
135 |
|
|
136 |
private boolean writeSameNamesDifferentParentToCsv( |
|
137 |
List<PesiMergeObject> mergingObjects, String sfileName){ |
|
138 |
|
|
139 |
try{ |
|
140 |
FileWriter writer = new FileWriter(sfileName); |
|
141 |
|
|
142 |
//create Header |
|
143 |
String firstLine = "same names but different parent"; |
|
144 |
createHeader(writer, firstLine); |
|
145 |
|
|
146 |
//write data |
|
147 |
for (PesiMergeObject merging : mergingObjects){ |
|
148 |
//TODO |
|
149 |
if ((merging.getParentStringInErms()== null )^ (merging.getParentStringInFaunaEu()== null)){ |
|
150 |
writeCsvLine(writer, merging) ; |
|
151 |
}else if(!((merging.getParentStringInErms()==null) && (merging.getParentStringInFaunaEu()==null))){ |
|
152 |
if(!merging.getParentStringInErms().equals(merging.getParentStringInFaunaEu())){ |
|
153 |
writeCsvLine(writer, merging) ; |
|
154 |
} |
|
155 |
} |
|
156 |
} |
|
157 |
writer.flush(); |
|
158 |
writer.close(); |
|
159 |
return true; |
|
160 |
}catch(IOException e){ |
|
161 |
return false; |
|
162 |
} |
|
163 |
} |
|
164 |
|
|
165 |
private boolean writeSameNamesdifferentRankToCsv( |
|
166 |
List<PesiMergeObject> mergingObjects, String sfileName){ |
|
167 |
|
|
168 |
try{ |
|
169 |
FileWriter writer = new FileWriter(sfileName); |
|
170 |
String firstLine = "same names but different rank"; |
|
171 |
//create Header |
|
172 |
createHeader(writer, firstLine); |
|
173 |
|
|
174 |
//write data |
|
175 |
for (PesiMergeObject merging : mergingObjects){ |
|
176 |
|
|
177 |
if (!merging.getRankInErms().equals(merging.getRankInFaunaEu())){ |
|
178 |
writeCsvLine(writer, merging); |
|
179 |
} |
|
180 |
} |
|
181 |
writer.flush(); |
|
182 |
writer.close(); |
|
183 |
return true; |
|
184 |
}catch(IOException e){ |
|
185 |
return false; |
|
186 |
} |
|
187 |
} |
|
188 |
|
|
189 |
private void createHeader(FileWriter writer, String firstLine) throws IOException{ |
|
190 |
writer.append(firstLine); |
|
191 |
writer.append('\n'); |
|
192 |
writer.append("uuid in Fauna Europaea"); |
|
193 |
writer.append(';'); |
|
194 |
writer.append("id in Fauna Europaea"); |
|
195 |
writer.append(';'); |
|
196 |
writer.append("name"); |
|
197 |
writer.append(';'); |
|
198 |
writer.append("author"); |
|
199 |
writer.append(';'); |
|
200 |
writer.append("rank"); |
|
201 |
writer.append(';'); |
|
202 |
writer.append("state"); |
|
203 |
writer.append(';'); |
|
204 |
writer.append("phylum"); |
|
205 |
writer.append(';'); |
|
206 |
writer.append("parent"); |
|
207 |
writer.append(';'); |
|
208 |
writer.append("parent rank"); |
|
209 |
writer.append(';'); |
|
210 |
|
|
211 |
writer.append("uuid in Erms"); |
|
212 |
writer.append(';'); |
|
213 |
writer.append("id in Erms"); |
|
214 |
writer.append(';'); |
|
215 |
writer.append("name"); |
|
216 |
writer.append(';'); |
|
217 |
writer.append("author"); |
|
218 |
writer.append(';'); |
|
219 |
writer.append("rank"); |
|
220 |
writer.append(';'); |
|
221 |
writer.append("state"); |
|
222 |
writer.append(';'); |
|
223 |
writer.append("phylum"); |
|
224 |
writer.append(';'); |
|
225 |
writer.append("parent"); |
|
226 |
writer.append(';'); |
|
227 |
writer.append("parent rank"); |
|
228 |
writer.append('\n'); |
|
229 |
} |
|
230 |
|
|
231 |
private boolean writeSameNamesDifferentStatusToCsv( |
|
232 |
List<PesiMergeObject> mergingObjects, String sfileName){ |
|
233 |
|
|
234 |
try{ |
|
235 |
FileWriter writer = new FileWriter(sfileName); |
|
236 |
|
|
237 |
//create Header |
|
238 |
String firstLine = "same names but different status"; |
|
239 |
createHeader(writer, firstLine); |
|
240 |
|
|
241 |
//write data |
|
242 |
for (PesiMergeObject merging : mergingObjects){ |
|
243 |
if (merging.isStatInErms()^merging.isStatInFaunaEu()){ |
|
244 |
writeCsvLine(writer, merging); |
|
245 |
} |
|
246 |
} |
|
247 |
|
|
248 |
writer.flush(); |
|
249 |
writer.close(); |
|
250 |
return true; |
|
251 |
}catch(IOException e){ |
|
252 |
return false; |
|
253 |
} |
|
254 |
} |
|
255 |
|
|
256 |
private boolean writeSameNamesDifferentAuthorToCsv( |
|
257 |
List<PesiMergeObject> mergingObjects, String sfileName){ |
|
258 |
|
|
259 |
try{ |
|
260 |
FileWriter writer = new FileWriter(sfileName); |
|
261 |
|
|
262 |
//create Header |
|
263 |
String firstLine = "same names but different authors"; |
|
264 |
createHeader(writer, firstLine); |
|
265 |
|
|
266 |
//write data |
|
267 |
for (PesiMergeObject merging : mergingObjects){ |
|
268 |
|
|
269 |
if (merging.getAuthorInErms() != null && merging.getAuthorInFaunaEu() != null && !merging.getAuthorInErms().equals(merging.getAuthorInFaunaEu())){ |
|
270 |
writeCsvLine(writer, merging); |
|
271 |
}else if ((merging.getAuthorInErms() == null && merging.getAuthorInFaunaEu() != null) || (merging.getAuthorInErms() != null && merging.getAuthorInFaunaEu() == null)) { |
|
272 |
writeCsvLine(writer, merging); |
|
273 |
} |
|
274 |
} |
|
275 |
|
|
276 |
writer.flush(); |
|
277 |
writer.close(); |
|
278 |
return true; |
|
279 |
}catch(IOException e){ |
|
280 |
return false; |
|
281 |
} |
|
282 |
} |
|
283 |
|
|
284 |
private void writeCsvLine(FileWriter writer, PesiMergeObject merging) throws IOException{ |
|
285 |
|
|
286 |
writer.append(merging.getUuidFaunaEu()); |
|
287 |
writer.append(';'); |
|
288 |
writer.append(merging.getIdInFaunaEu()); |
|
289 |
writer.append(';'); |
|
290 |
writer.append(merging.getNameCacheInFaunaEu()); |
|
291 |
writer.append(';'); |
|
292 |
writer.append(merging.getAuthorInFaunaEu()); |
|
293 |
writer.append(';'); |
|
294 |
writer.append(merging.getRankInFaunaEu()); |
|
295 |
writer.append(';'); |
|
296 |
if (merging.isStatInFaunaEu()){ |
|
297 |
writer.append("accepted"); |
|
298 |
}else{ |
|
299 |
writer.append("synonym"); |
|
300 |
} |
|
301 |
writer.append(';'); |
|
302 |
writer.append(merging.getPhylumInFaunaEu() != null? merging.getPhylumInFaunaEu().getTaxonTitleCache(): ""); |
|
303 |
writer.append(';'); |
|
304 |
writer.append(merging.getParentStringInFaunaEu()); |
|
305 |
writer.append(';'); |
|
306 |
writer.append(merging.getParentRankStringInFaunaEu()); |
|
307 |
writer.append(';'); |
|
308 |
|
|
309 |
writer.append(merging.getUuidErms()); |
|
310 |
writer.append(';'); |
|
311 |
writer.append(merging.getIdInErms()); |
|
312 |
writer.append(';'); |
|
313 |
writer.append(merging.getNameCacheInErms()); |
|
314 |
writer.append(';'); |
|
315 |
writer.append(merging.getAuthorInErms()); |
|
316 |
writer.append(';'); |
|
317 |
writer.append(merging.getRankInErms()); |
|
318 |
writer.append(';'); |
|
319 |
if (merging.isStatInErms()){ |
|
320 |
writer.append("accepted"); |
|
321 |
}else{ |
|
322 |
writer.append("synonym"); |
|
323 |
} |
|
324 |
|
|
325 |
writer.append(';'); |
|
326 |
writer.append(merging.getPhylumInErms() != null? merging.getPhylumInErms().getTaxonTitleCache():""); |
|
327 |
writer.append(';'); |
|
328 |
writer.append(merging.getParentStringInErms()); |
|
329 |
writer.append(';'); |
|
330 |
writer.append(merging.getParentRankStringInErms()); |
|
331 |
writer.append('\n'); |
|
332 |
} |
|
333 |
|
|
334 |
private List<PesiMergeObject> createMergeObjects(Map<String,List<TaxonName>> names, |
|
335 |
CdmApplicationController appCtr){ |
|
336 |
|
|
337 |
UUID uuidClassification1 = UUID.fromString("44d8605e-a7ce-41e1-bee9-99edfec01e7c"); |
|
338 |
UUID uuidClassification2 = UUID.fromString("6fa988a9-10b7-48b0-a370-2586fbc066eb"); |
|
339 |
Classification classification1 = appCtr.getClassificationService().load(uuidClassification1); |
|
340 |
Classification classification2 = appCtr.getClassificationService().load(uuidClassification2); |
|
341 |
|
|
342 |
List<PesiMergeObject> merge = new ArrayList<>(); |
|
343 |
|
|
344 |
for (String nameCache: names.keySet()){ |
|
345 |
createSingleMergeObject(appCtr, merge, nameCache, names.get(nameCache),classification1, classification2); |
|
346 |
} |
|
347 |
|
|
348 |
return merge; |
|
349 |
} |
|
350 |
|
|
351 |
|
|
352 |
private void createSingleMergeObject(CdmApplicationController appCtr, List<PesiMergeObject> merge, String nameCache, |
|
353 |
List<TaxonName> identicalNames, |
|
354 |
Classification classification1, Classification classification2) { |
|
355 |
|
|
356 |
PesiMergeObject mergeObject = new PesiMergeObject(); |
|
357 |
|
|
358 |
if(identicalNames.size()!= 2) { |
|
359 |
logger.warn(nameCache + " has more than 2 names with identical name cache from different sources. This is not yet handled."); |
|
360 |
return; |
|
361 |
} |
|
362 |
//getPhylum |
|
363 |
TaxonNodeDto phylum1 = null; |
|
364 |
TaxonName faunaEuName = null; |
|
365 |
TaxonName ermsName = null; |
|
366 |
|
|
367 |
Set<TaxonBase> taxonBases = identicalNames.get(0).getTaxonBases(); |
|
368 |
if (taxonBases.size()==1) { |
|
369 |
Iterator<TaxonBase> it = taxonBases.iterator(); |
|
370 |
TaxonBase<?> tempName = it.next(); |
|
371 |
if (tempName.getSec().getUuid().equals(faunaEuSecUuid)) { |
|
372 |
faunaEuName = identicalNames.get(0); |
|
373 |
ermsName = identicalNames.get(1); |
|
374 |
}else { |
|
375 |
faunaEuName = identicalNames.get(1); |
|
376 |
ermsName = identicalNames.get(0); |
|
377 |
} |
|
378 |
}else { |
|
379 |
//TODO: find the two correct names |
|
380 |
logger.warn("Name has not exact 1 but " + taxonBases.size() + " taxon base attached. This is not yet handled"); |
|
381 |
return; |
|
382 |
} |
|
383 |
if (faunaEuName.getRank().equals(Rank.PHYLUM())) { |
|
384 |
Taxon taxon = null; |
|
385 |
taxon = getAcceptedTaxon(faunaEuName); |
|
386 |
if (taxon != null) { |
|
387 |
phylum1 = new TaxonNodeDto(taxon.getTaxonNode(classification1)); |
|
388 |
} |
|
389 |
|
|
390 |
} |
|
391 |
if (phylum1 == null && !faunaEuName.getRank().isHigher(Rank.PHYLUM())){ |
|
392 |
phylum1 = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(classification1, Rank.PHYLUM(), faunaEuName); |
|
393 |
} |
|
394 |
|
|
395 |
TaxonNodeDto phylum2 = null; |
|
396 |
if (ermsName.getRank().equals(Rank.PHYLUM())) { |
|
397 |
Taxon taxon = null; |
|
398 |
taxon = getAcceptedTaxon(ermsName); |
|
399 |
if (taxon != null) { |
|
400 |
phylum2 = new TaxonNodeDto(taxon.getTaxonNode(classification2)); |
|
401 |
} |
|
402 |
|
|
403 |
} |
|
404 |
if (phylum2 == null && !ermsName.getRank().isHigher(Rank.PHYLUM())){ |
|
405 |
phylum2 = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(classification2, Rank.PHYLUM(), ermsName); |
|
406 |
} |
|
407 |
mergeObject.setPhylumInErms(phylum1); |
|
408 |
mergeObject.setPhylumInFaunaEu(phylum2); |
|
409 |
|
|
410 |
//getUuids |
|
411 |
mergeObject.setUuidErms(ermsName.getUuid().toString()); |
|
412 |
mergeObject.setUuidFaunaEu(faunaEuName.getUuid().toString()); |
|
413 |
|
|
414 |
Iterator<IdentifiableSource> sources = ermsName.getSources().iterator(); |
|
415 |
if (sources.hasNext()){ |
|
416 |
IdentifiableSource source = sources.next(); |
|
417 |
String idInSource1 = source.getIdInSource(); |
|
418 |
mergeObject.setIdInErms(idInSource1); |
|
419 |
} |
|
420 |
sources = faunaEuName.getSources().iterator(); |
|
421 |
if (sources.hasNext()){ |
|
422 |
IdentifiableSource source = sources.next(); |
|
423 |
String idInSource1 = source.getIdInSource(); |
|
424 |
mergeObject.setIdInFaunaEu(idInSource1); |
|
425 |
} |
|
426 |
|
|
427 |
mergeObject.setNameCacheInErms(ermsName.getNameCache()); |
|
428 |
mergeObject.setNameCacheInFaunaEu(faunaEuName.getNameCache()); |
|
429 |
|
|
430 |
mergeObject.setAuthorInErms(ermsName.getAuthorshipCache()); |
|
431 |
mergeObject.setAuthorInFaunaEu(faunaEuName.getAuthorshipCache()); |
|
432 |
Set<Taxon> taxa = ermsName.getTaxa(); |
|
433 |
if (!taxa.isEmpty()){ |
|
434 |
mergeObject.setStatInErms(true); |
|
435 |
Iterator<Taxon> taxaIterator = taxa.iterator(); |
|
436 |
Taxon taxon = null; |
|
437 |
while (taxaIterator.hasNext()){ |
|
438 |
taxon = taxaIterator.next(); |
|
439 |
if (!taxon.isMisapplication()){ |
|
440 |
break; |
|
441 |
} |
|
442 |
} |
|
443 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
444 |
Iterator<TaxonNode> taxonNodeIterator = nodes.iterator(); |
|
445 |
TaxonNode node, parentNode = null; |
|
446 |
while (taxonNodeIterator.hasNext()){ |
|
447 |
node = taxonNodeIterator.next(); |
|
448 |
if (!node.isTopmostNode()){ |
|
449 |
parentNode = node.getParent(); |
|
450 |
} |
|
451 |
} |
|
452 |
//TODO: ändern mit erweitertem Initializer.. |
|
453 |
if (parentNode != null){ |
|
454 |
TaxonName parentName = CdmBase.deproxy(parentNode.getTaxon().getName()); |
|
455 |
String parentNameCache = parentName.getNameCache(); |
|
456 |
mergeObject.setParentStringInErms(parentNameCache); |
|
457 |
mergeObject.setParentRankStringInErms(parentName.getRank().getLabel()); |
|
458 |
} |
|
459 |
}else{ |
|
460 |
mergeObject.setStatInErms(false); |
|
461 |
TaxonNode parentNode = getAcceptedNode(ermsName); |
|
462 |
//TODO: ändern mit erweitertem Initializer.. |
|
463 |
if (parentNode != null){ |
|
464 |
TaxonName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName()); |
|
465 |
String parentNameCache = parentName.getNameCache(); |
|
466 |
mergeObject.setParentStringInErms(parentNameCache); |
|
467 |
mergeObject.setParentRankStringInErms(parentName.getRank().getLabel()); |
|
468 |
} |
|
469 |
} |
|
470 |
taxa = faunaEuName.getTaxa(); |
|
471 |
if (!taxa.isEmpty()){ |
|
472 |
mergeObject.setStatInFaunaEu(true); |
|
473 |
Iterator<Taxon> taxaIterator = taxa.iterator(); |
|
474 |
Taxon taxon = null; |
|
475 |
while (taxaIterator.hasNext()){ |
|
476 |
taxon = taxaIterator.next(); |
|
477 |
if (!taxon.isMisapplication()){ |
|
478 |
break; |
|
479 |
} |
|
480 |
} |
|
481 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
482 |
Iterator<TaxonNode> taxonNodeIterator = nodes.iterator(); |
|
483 |
TaxonNode node, parentNode = null; |
|
484 |
while (taxonNodeIterator.hasNext()){ |
|
485 |
node = taxonNodeIterator.next(); |
|
486 |
if (!node.isTopmostNode()){ |
|
487 |
parentNode = node.getParent(); |
|
488 |
} |
|
489 |
} |
|
490 |
//TODO: ändern mit erweitertem Initializer.. |
|
491 |
if (parentNode != null){ |
|
492 |
if (parentNode.getTaxon().getName().isZoological()){ |
|
493 |
|
|
494 |
IZoologicalName parentName = CdmBase.deproxy(parentNode.getTaxon().getName()); |
|
495 |
String parentNameCache = parentName.getNameCache(); |
|
496 |
mergeObject.setParentStringInFaunaEu(parentNameCache); |
|
497 |
mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel()); |
|
498 |
|
|
499 |
}else{ |
|
500 |
logger.debug("no zoologicalName: " + parentNode.getTaxon().getName().getTitleCache() +" . "+parentNode.getTaxon().getName().getUuid()); |
|
501 |
} |
|
502 |
} |
|
503 |
}else{ |
|
504 |
mergeObject.setStatInFaunaEu(false); |
|
505 |
TaxonNode parentNode = getAcceptedNode(faunaEuName); |
|
506 |
//TODO: ändern mit erweitertem Initializer.. |
|
507 |
if (parentNode != null){ |
|
508 |
TaxonName parentName = HibernateProxyHelper.deproxy(parentNode.getTaxon().getName()); |
|
509 |
String parentNameCache = parentName.getNameCache(); |
|
510 |
mergeObject.setParentStringInFaunaEu(parentNameCache); |
|
511 |
mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel()); |
|
512 |
|
|
513 |
} |
|
514 |
} |
|
515 |
|
|
516 |
mergeObject.setRankInErms(ermsName.getRank().getLabel()); |
|
517 |
mergeObject.setRankInFaunaEu(faunaEuName.getRank().getLabel()); |
|
518 |
|
|
519 |
//set parent informations |
|
520 |
|
|
521 |
/* |
|
522 |
Set<HybridRelationship> parentRelations = zooName.getParentRelationships(); |
|
523 |
Iterator parentIterator = parentRelations.iterator(); |
|
524 |
HybridRelationship parentRel; |
|
525 |
ZoologicalName parentName; |
|
526 |
while (parentIterator.hasNext()){ |
|
527 |
parentRel = (HybridRelationship)parentIterator.next(); |
|
528 |
parentName = (ZoologicalName)parentRel.getParentName(); |
|
529 |
mergeObject.setParentRankStringInErms(parentName.getRank().getLabel()); |
|
530 |
mergeObject.setParentStringInErms(parentName.getNameCache()); |
|
531 |
} |
|
532 |
|
|
533 |
parentRelations = zooName2.getParentRelationships(); |
|
534 |
parentIterator = parentRelations.iterator(); |
|
535 |
|
|
536 |
while (parentIterator.hasNext()){ |
|
537 |
parentRel = (HybridRelationship)parentIterator.next(); |
|
538 |
parentName = (ZoologicalName)parentRel.getParentName(); |
|
539 |
mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel()); |
|
540 |
mergeObject.setParentStringInFaunaEu(parentName.getNameCache()); |
|
541 |
}*/ |
|
542 |
merge.add(mergeObject); |
|
543 |
} |
|
544 |
|
|
545 |
private TaxonNode getAcceptedNode(TaxonName ermsName) { |
|
546 |
Set<TaxonBase> taxonBases = ermsName.getTaxonBases(); |
|
547 |
Taxon taxon = null; |
|
548 |
if (taxonBases != null && !taxonBases.isEmpty()) { |
|
549 |
TaxonBase<?> taxonBase = taxonBases.iterator().next(); |
|
550 |
if (taxonBase instanceof Synonym) { |
|
551 |
taxon = ((Synonym)taxonBase).getAcceptedTaxon(); |
|
552 |
} |
|
553 |
} |
|
554 |
|
|
555 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
556 |
|
|
557 |
TaxonNode parentNode = null; |
|
558 |
if (nodes != null && !nodes.isEmpty()) { |
|
559 |
parentNode = nodes.iterator().next(); |
|
560 |
} |
|
561 |
return parentNode; |
|
562 |
} |
|
563 |
|
|
564 |
private Taxon getAcceptedTaxon(TaxonName ermsName) { |
|
565 |
Taxon taxon = null; |
|
566 |
if (ermsName.getTaxa() != null && !ermsName.getTaxa().isEmpty()){ |
|
567 |
taxon = ermsName.getTaxa().iterator().next(); |
|
568 |
|
|
569 |
}else if (ermsName.getTaxonBases() != null && !ermsName.getTaxonBases().isEmpty()){ |
|
570 |
TaxonBase<?> taxonBase = ermsName.getTaxonBases().iterator().next(); |
|
571 |
if (taxonBase instanceof Synonym) { |
|
572 |
Synonym syn = (Synonym)taxonBase; |
|
573 |
taxon = syn.getAcceptedTaxon(); |
|
574 |
} |
|
575 |
} |
|
576 |
return taxon; |
|
577 |
} |
|
578 |
|
|
579 |
public static void main(String[] args) { |
|
580 |
FaunaEuErmsFindIdenticalNamesActivator activator = new FaunaEuErmsFindIdenticalNamesActivator(); |
|
581 |
activator.invoke(pesiSource); |
|
582 |
System.exit(0); |
|
583 |
} |
|
584 |
} |
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/PesiFindIdenticalNamesActivator.java | ||
---|---|---|
1 |
package eu.etaxonomy.cdm.app.pesi.merging; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.FileWriter; |
|
5 |
import java.io.IOException; |
|
6 |
import java.lang.reflect.InvocationTargetException; |
|
7 |
import java.lang.reflect.Method; |
|
8 |
import java.util.ArrayList; |
|
9 |
import java.util.Arrays; |
|
10 |
import java.util.HashMap; |
|
11 |
import java.util.Iterator; |
|
12 |
import java.util.List; |
|
13 |
import java.util.Map; |
|
14 |
import java.util.Set; |
|
15 |
import java.util.UUID; |
|
16 |
|
|
17 |
import org.apache.log4j.Logger; |
|
18 |
|
|
19 |
import eu.etaxonomy.cdm.api.application.CdmApplicationController; |
|
20 |
import eu.etaxonomy.cdm.app.common.CdmDestinations; |
|
21 |
import eu.etaxonomy.cdm.common.CdmUtils; |
|
22 |
import eu.etaxonomy.cdm.common.StringComparator; |
|
23 |
import eu.etaxonomy.cdm.database.DbSchemaValidation; |
|
24 |
import eu.etaxonomy.cdm.database.ICdmDataSource; |
|
25 |
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController; |
|
26 |
import eu.etaxonomy.cdm.io.pesi.merging.PesiMergeObject; |
|
27 |
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer; |
|
28 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
|
29 |
import eu.etaxonomy.cdm.model.common.IdentifiableSource; |
|
30 |
import eu.etaxonomy.cdm.model.name.Rank; |
|
31 |
import eu.etaxonomy.cdm.model.name.TaxonName; |
|
32 |
import eu.etaxonomy.cdm.model.taxon.Synonym; |
|
33 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
34 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
|
35 |
import eu.etaxonomy.cdm.model.taxon.TaxonNode; |
|
36 |
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto; |
|
37 |
|
|
38 |
public class PesiFindIdenticalNamesActivator { |
|
39 |
|
|
40 |
private static final Logger logger = Logger.getLogger(PesiFindIdenticalNamesActivator.class); |
|
41 |
|
|
42 |
//static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2(); |
|
43 |
// static final ICdmDataSource pesiSource = CdmDestinations.cdm_test_local_faunaEu_mysql(); |
|
44 |
static final ICdmDataSource pesiSource = CdmDestinations.cdm_pesi2019_final(); |
|
45 |
|
|
46 |
static final String path = System.getProperty("user.home")+File.separator+".cdmLibrary"+File.separator+"pesi"+File.separator+"pesimerge"; |
|
47 |
|
|
48 |
private static UUID faunaEuSourceUuid = PesiTransformer.uuidSourceRefFaunaEuropaea; |
|
49 |
private static UUID ermsSourceUuid = PesiTransformer.uuidSourceRefErms; |
|
50 |
private static UUID ifSourceUuid = PesiTransformer.uuidSourceRefIndexFungorum; |
|
51 |
private static UUID emSourceUuid = PesiTransformer.uuidSourceRefEuroMed; |
|
52 |
private static List<UUID> sourceRefUuids = new ArrayList<>(); |
|
53 |
private static Map<UUID,String> sources = new HashMap<>(); |
|
54 |
|
|
55 |
static { |
|
56 |
sourceRefUuids.addAll(Arrays.asList(new UUID[]{emSourceUuid, ermsSourceUuid, faunaEuSourceUuid, ifSourceUuid})); |
|
57 |
sources.put(emSourceUuid, "E+M"); |
|
58 |
sources.put(ermsSourceUuid, "ERMS"); |
|
59 |
sources.put(faunaEuSourceUuid, "FauEu"); |
|
60 |
sources.put(ifSourceUuid, "IF"); |
|
61 |
} |
|
62 |
|
|
63 |
|
|
64 |
private void invoke(ICdmDataSource source){ |
|
65 |
|
|
66 |
CdmApplicationController app = CdmIoApplicationController.NewInstance(source, DbSchemaValidation.VALIDATE, false); |
|
67 |
|
|
68 |
List<String> propertyPaths = new ArrayList<>(); |
|
69 |
propertyPaths.add("sources.*"); |
|
70 |
propertyPaths.add("sources.idInSource"); |
|
71 |
propertyPaths.add("sources.idNamespace"); |
|
72 |
propertyPaths.add("taxonBases.*"); |
|
73 |
propertyPaths.add("taxonBases.relationsFromThisTaxon"); |
|
74 |
propertyPaths.add("taxonBases.taxonNodes.*"); |
|
75 |
propertyPaths.add("taxonBases.taxonNodes.parent.*"); |
|
76 |
propertyPaths.add("taxonBases.taxonNodes.childNodes.*"); |
|
77 |
propertyPaths.add("taxonBases.taxonNodes.childNodes.classification.rootNode.childNodes.*"); |
|
78 |
propertyPaths.add("taxonBases.taxonNodes.parent.taxon.name.*"); |
|
79 |
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.*"); |
|
80 |
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.*"); |
|
81 |
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.classification.rootNode.childNodes.*"); |
|
82 |
System.out.println("Start getIdenticalNames..."); |
|
83 |
|
|
84 |
Map<String, Map<UUID, Set<TaxonName>>> namesOfIdenticalTaxa; |
|
85 |
try { |
|
86 |
namesOfIdenticalTaxa = app.getTaxonService().findIdenticalTaxonNames(sourceRefUuids, propertyPaths); |
|
87 |
} catch (Exception e) { |
|
88 |
e.printStackTrace(); |
|
89 |
return; |
|
90 |
} |
|
91 |
System.out.println("Start creating merging objects"); |
|
92 |
List<Map<UUID, PesiMergeObject>> mergingObjects = createMergeObjects(namesOfIdenticalTaxa, app); |
|
93 |
boolean resultOK = true; |
|
94 |
System.out.println("Start creating csv files"); |
|
95 |
resultOK &= writeSameNamesDifferentAuthorToCsv(mergingObjects, sources, path + "_authors.csv"); |
|
96 |
resultOK &= writeSameNamesDifferentStatusToCsv(mergingObjects, sources, path + "_status.csv"); |
|
97 |
resultOK &= writeSameNamesToCsvFile(mergingObjects, sources, path + "_names.csv"); |
|
98 |
resultOK &= writeSameNamesDifferentPhylumToCsv(mergingObjects, sources, path + "_phylum.csv"); |
|
99 |
resultOK &= writeSameNamesDifferentParentToCsv(mergingObjects, sources, path + "parent.csv"); |
|
100 |
resultOK &= writeSameNamesDifferentRankToCsv(mergingObjects, sources, path + "_rank.csv"); |
|
101 |
|
|
102 |
System.out.println("End find identical names for PESI: " + resultOK + ". Results written to " + path); |
|
103 |
} |
|
104 |
|
|
105 |
private boolean writeSameNamesToCsvFile( |
|
106 |
List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName) { |
|
107 |
|
|
108 |
String header = "same names (all)"; |
|
109 |
String methodName = null; |
|
110 |
return writeDifference(header, methodName, mergingObjects, sources, sFileName); |
|
111 |
} |
|
112 |
|
|
113 |
private boolean writeSameNamesDifferentPhylumToCsv( |
|
114 |
List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){ |
|
115 |
|
|
116 |
String header = "same names but different phylum"; |
|
117 |
String methodName = "getPhylum"; |
|
118 |
return writeDifference(header, methodName, mergingObjects, sources, sFileName); |
|
119 |
} |
|
120 |
|
|
121 |
private boolean writeSameNamesDifferentParentToCsv( |
|
122 |
List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){ |
|
123 |
|
|
124 |
String header = "same names but different parent"; |
|
125 |
String methodName = "getParentString"; |
|
126 |
return writeDifference(header, methodName, mergingObjects, sources, sFileName); |
|
127 |
} |
|
128 |
|
|
129 |
private boolean writeSameNamesDifferentRankToCsv( |
|
130 |
List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){ |
|
131 |
|
|
132 |
String header = "same names but different rank"; |
|
133 |
String methodName = "getRank"; |
|
134 |
return writeDifference(header, methodName, mergingObjects, sources, sFileName); |
|
135 |
} |
|
136 |
|
|
137 |
private boolean writeSameNamesDifferentStatusToCsv( |
|
138 |
List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){ |
|
139 |
|
|
140 |
String header = "same names but different status"; |
|
141 |
String methodName = "isStatus"; |
|
142 |
return writeDifference(header, methodName, mergingObjects, sources, sFileName); |
|
143 |
} |
|
144 |
|
|
145 |
private boolean writeSameNamesDifferentAuthorToCsv( |
|
146 |
List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName){ |
|
147 |
|
|
148 |
String header = "same names but different author"; |
|
149 |
String methodName = "getAuthor"; |
|
150 |
return writeDifference(header, methodName, mergingObjects, sources, sFileName); |
|
151 |
} |
|
152 |
|
|
153 |
private boolean writeDifference(String header, String methodName, |
|
154 |
List<Map<UUID, PesiMergeObject>> mergingObjects, Map<UUID,String> sources, String sFileName) { |
|
155 |
|
|
156 |
try{ |
|
157 |
Method method = methodName == null? null : PesiMergeObject.class.getMethod(methodName); |
|
158 |
|
|
159 |
FileWriter writer = new FileWriter(sFileName); |
|
160 |
|
|
161 |
//create Header |
|
162 |
createHeader(writer, header); |
|
163 |
|
|
164 |
//write data |
|
165 |
for (Map<UUID, PesiMergeObject> merging : mergingObjects){ |
|
166 |
if (isDifferent(merging, method)){ |
|
167 |
writeCsvLine(writer, merging, sources) ; |
|
168 |
} |
|
169 |
} |
|
170 |
writer.flush(); |
|
171 |
writer.close(); |
|
172 |
return true; |
|
173 |
}catch(IOException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e){ |
|
174 |
logger.error(e.getMessage()); |
|
175 |
return false; |
|
176 |
} |
|
177 |
} |
|
178 |
|
|
179 |
private boolean isDifferent(Map<UUID, PesiMergeObject> merging, Method method) throws IllegalAccessException, IllegalArgumentException, InvocationTargetException { |
|
180 |
|
|
181 |
if (method == null){ |
|
182 |
return true; |
|
183 |
} |
|
184 |
Object value = null; |
|
185 |
boolean isFirst = true; |
|
186 |
for (UUID sourceUuid: merging.keySet()){ |
|
187 |
if (isFirst){ |
|
188 |
value = method.invoke(merging.get(sourceUuid)); |
|
189 |
isFirst = false; |
|
190 |
}else{ |
|
191 |
Object newValue = method.invoke(merging.get(sourceUuid)); |
|
192 |
if (!CdmUtils.nullSafeEqual(newValue, value)){ |
|
193 |
return true; |
|
194 |
} |
|
195 |
} |
|
196 |
} |
|
197 |
return false; |
|
198 |
} |
|
199 |
|
|
200 |
private void createHeader(FileWriter writer, String firstLine) throws IOException{ |
|
201 |
writer.append(firstLine); |
|
202 |
writer.append('\n'); |
|
203 |
|
|
204 |
for (int i=1; i<=2; i++){ |
|
205 |
writer.append("source"+i); |
|
206 |
writer.append(';'); |
|
207 |
writer.append("name uuid"+i); |
|
208 |
writer.append(';'); |
|
209 |
writer.append("name id"+i); |
|
210 |
writer.append(';'); |
|
211 |
writer.append("name"+i); |
|
212 |
writer.append(';'); |
|
213 |
writer.append("author"+i); |
|
214 |
writer.append(';'); |
|
215 |
writer.append("rank"+i); |
|
216 |
writer.append(';'); |
|
217 |
writer.append("state"+i); |
|
218 |
writer.append(';'); |
|
219 |
writer.append("phylum"+i); |
|
220 |
writer.append(';'); |
|
221 |
writer.append("parent"+i); |
|
222 |
writer.append(';'); |
|
223 |
writer.append("parent rank"+i); |
|
224 |
writer.append(';'); |
|
225 |
} |
|
226 |
writer.append('\n'); |
|
227 |
} |
|
228 |
|
|
229 |
private void writeCsvLine(FileWriter writer, Map<UUID,PesiMergeObject> mergeObjects, Map<UUID,String> sources) throws IOException{ |
|
230 |
|
|
231 |
for (UUID uuid : sourceRefUuids){ |
|
232 |
PesiMergeObject merging = mergeObjects.get(uuid); |
|
233 |
if(merging == null){ |
|
234 |
continue; |
|
235 |
} |
|
236 |
writer.append(sources.get(uuid)).append(";"); |
|
237 |
writer.append(merging.getUuidName()).append(";"); |
|
238 |
writer.append(merging.getIdInSource()).append(";"); |
|
239 |
writer.append(merging.getNameCache()).append(";"); |
|
240 |
writer.append(merging.getAuthor()).append(";"); |
|
241 |
writer.append(merging.getRank()).append(";"); |
|
242 |
if (merging.isStatus()){ |
|
243 |
writer.append("accepted").append(";");; |
|
244 |
}else{ |
|
245 |
writer.append("synonym").append(";");; |
|
246 |
} |
|
247 |
writer.append(merging.getPhylum() != null? merging.getPhylum().getTitleCache(): "").append(";"); |
|
248 |
writer.append(merging.getParentString()).append(";"); |
|
249 |
writer.append(merging.getParentRankString()).append(";");; |
|
250 |
} |
|
251 |
writer.append('\n'); |
|
252 |
} |
|
253 |
|
|
254 |
private List<Map<UUID,PesiMergeObject>> createMergeObjects(Map<String, Map<UUID, Set<TaxonName>>> names, |
|
255 |
CdmApplicationController appCtr){ |
|
256 |
|
|
257 |
List<Map<UUID,PesiMergeObject>> merge = new ArrayList<>(); |
|
258 |
|
|
259 |
List<String> nameCaches = new ArrayList<>(names.keySet()); |
|
260 |
nameCaches.sort(StringComparator.Instance); |
|
261 |
for (String nameCache: nameCaches){ |
|
262 |
createSingleMergeObject(appCtr, merge, names.get(nameCache)); |
|
263 |
} |
|
264 |
|
|
265 |
return merge; |
|
266 |
} |
|
267 |
|
|
268 |
|
|
269 |
private void createSingleMergeObject(CdmApplicationController appCtr, List<Map<UUID,PesiMergeObject>> merge, |
|
270 |
Map<UUID, Set<TaxonName>> identicalNames) { |
|
271 |
|
|
272 |
Map<UUID,PesiMergeObject> mergeMap = new HashMap<>(); |
|
273 |
|
|
274 |
for (UUID sourceUuid : identicalNames.keySet()){ |
|
275 |
Set<TaxonName> names = identicalNames.get(sourceUuid); |
|
276 |
if (names.isEmpty()){ |
|
277 |
continue; |
|
278 |
} |
|
279 |
TaxonName name = names.iterator().next(); |
|
280 |
String nameAndIdStr = name.getTitleCache() + "; id = " + name.getId(); |
|
281 |
if (names.size()>1){ |
|
282 |
logger.warn("Multiple names per source not yet handled. Take arbitrary one: " + nameAndIdStr); |
|
283 |
} |
|
284 |
|
|
285 |
PesiMergeObject mergeObject = new PesiMergeObject(); |
|
286 |
mergeMap.put(sourceUuid, mergeObject); |
|
287 |
|
|
288 |
Set<TaxonBase> taxonBases = name.getTaxonBases(); |
|
289 |
if (taxonBases.isEmpty()){ |
|
290 |
logger.warn("No taxonbase attached to name. This is not yet handled: " + nameAndIdStr); |
|
291 |
continue; |
|
292 |
} |
|
293 |
if (taxonBases.size() > 1) { |
|
294 |
//TODO: find the two correct names |
|
295 |
logger.warn("Name has not exact 1 but " + taxonBases.size() + " taxon base attached. This is not yet handled. Take arbitrary one."); |
|
296 |
} |
|
297 |
TaxonBase<?> taxonBase = taxonBases.iterator().next(); |
|
298 |
|
|
299 |
//uuid |
|
300 |
mergeObject.setUuidName(name.getUuid().toString()); |
|
301 |
|
|
302 |
//nameCache |
|
303 |
mergeObject.setNameCache(name.getNameCache()); |
|
304 |
|
|
305 |
//authorship |
|
306 |
mergeObject.setAuthor(name.getAuthorshipCache()); |
|
307 |
|
|
308 |
//rank |
|
309 |
mergeObject.setRank(name.getRank().getLabel()); |
|
310 |
|
|
311 |
//Phylum |
|
312 |
TaxonNodeDto phylum = null; |
|
313 |
if (name.getRank().equals(Rank.PHYLUM())) { |
|
314 |
Taxon taxon = getAcceptedTaxon(name); |
|
315 |
if (taxon != null) { |
|
316 |
if (taxon.getTaxonNodes().size()>1){ |
|
317 |
logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one."); |
|
318 |
} |
|
319 |
TaxonNode node = taxon.getTaxonNodes().iterator().next(); |
|
320 |
phylum = new TaxonNodeDto(node); |
|
321 |
} |
|
322 |
|
|
323 |
} |
|
324 |
if (phylum == null && !name.getRank().isHigher(Rank.PHYLUM())){ |
|
325 |
Taxon taxon = getAcceptedTaxon(name); |
|
326 |
if (!taxon.getTaxonNodes().isEmpty()){ |
|
327 |
if (taxon.getTaxonNodes().size()>1){ |
|
328 |
logger.warn("More than 1 node not yet handled for getPhylum. Take arbitrary one."); |
|
329 |
} |
|
330 |
TaxonNode node = taxon.getTaxonNodes().iterator().next(); |
|
331 |
phylum = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(node.getClassification(), Rank.PHYLUM(), name); |
|
332 |
} |
|
333 |
} |
|
334 |
mergeObject.setPhylum(phylum); |
|
335 |
|
|
336 |
//idInSource |
|
337 |
Iterator<IdentifiableSource> sources = name.getSources().iterator(); |
|
338 |
//TODO idInSource - what if multiple sources exist? |
|
339 |
if (sources.hasNext()){ |
|
340 |
IdentifiableSource source = sources.next(); |
|
341 |
String idInSource = source.getIdInSource(); |
|
342 |
mergeObject.setIdInSource(idInSource); |
|
343 |
} |
|
344 |
|
|
345 |
//status and parent |
|
346 |
Set<Taxon> taxa = name.getTaxa(); |
|
347 |
if (!taxa.isEmpty()){ |
|
348 |
mergeObject.setStatus(true); |
|
349 |
Iterator<Taxon> taxaIterator = taxa.iterator(); |
|
350 |
Taxon taxon = null; |
|
351 |
while (taxaIterator.hasNext()){ |
|
352 |
taxon = taxaIterator.next(); |
|
353 |
if (!taxon.isMisapplication()){ |
|
354 |
break; |
|
355 |
} |
|
356 |
} |
|
357 |
@SuppressWarnings("null") |
|
358 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
359 |
Iterator<TaxonNode> taxonNodeIterator = nodes.iterator(); |
|
360 |
TaxonNode parentNode = null; |
|
361 |
while (taxonNodeIterator.hasNext()){ |
|
362 |
TaxonNode node = taxonNodeIterator.next(); |
|
363 |
if (!node.isTopmostNode()){ |
|
364 |
parentNode = node.getParent(); |
|
365 |
} |
|
366 |
} |
|
367 |
//TODO: ändern mit erweitertem Initializer.. |
|
368 |
if (parentNode != null){ |
|
369 |
TaxonName parentName = CdmBase.deproxy(parentNode.getTaxon().getName()); |
|
370 |
String parentNameCache = parentName.getNameCache(); |
|
371 |
mergeObject.setParentString(parentNameCache); |
|
372 |
mergeObject.setParentRankString(parentName.getRank().getLabel()); |
|
373 |
} |
|
374 |
}else{ |
|
375 |
mergeObject.setStatus(false); |
|
376 |
TaxonNode parentNode = getAcceptedNode(name); |
|
377 |
//TODO: ändern mit erweitertem Initializer.. |
|
378 |
if (parentNode != null){ |
|
379 |
TaxonName parentName = CdmBase.deproxy(parentNode.getTaxon().getName()); |
|
380 |
String parentNameCache = parentName.getNameCache(); |
|
381 |
mergeObject.setParentString(parentNameCache); |
|
382 |
mergeObject.setParentRankString(parentName.getRank().getLabel()); |
|
383 |
} |
|
384 |
} |
|
385 |
} |
|
386 |
|
|
387 |
|
|
388 |
//set parent informations |
|
389 |
|
|
390 |
/* |
|
391 |
Set<HybridRelationship> parentRelations = zooName.getParentRelationships(); |
|
392 |
Iterator parentIterator = parentRelations.iterator(); |
|
393 |
HybridRelationship parentRel; |
|
394 |
ZoologicalName parentName; |
|
395 |
while (parentIterator.hasNext()){ |
|
396 |
parentRel = (HybridRelationship)parentIterator.next(); |
|
397 |
parentName = (ZoologicalName)parentRel.getParentName(); |
|
398 |
mergeObject.setParentRankStringInErms(parentName.getRank().getLabel()); |
|
399 |
mergeObject.setParentStringInErms(parentName.getNameCache()); |
|
400 |
} |
|
401 |
|
|
402 |
parentRelations = zooName2.getParentRelationships(); |
|
403 |
parentIterator = parentRelations.iterator(); |
|
404 |
|
|
405 |
while (parentIterator.hasNext()){ |
|
406 |
parentRel = (HybridRelationship)parentIterator.next(); |
|
407 |
parentName = (ZoologicalName)parentRel.getParentName(); |
|
408 |
mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel()); |
|
409 |
mergeObject.setParentStringInFaunaEu(parentName.getNameCache()); |
|
410 |
}*/ |
|
411 |
|
|
412 |
merge.add(mergeMap); |
|
413 |
} |
|
414 |
|
|
415 |
private TaxonNode getAcceptedNode(TaxonName ermsName) { |
|
416 |
Set<TaxonBase> taxonBases = ermsName.getTaxonBases(); |
|
417 |
Taxon taxon = null; |
|
418 |
if (taxonBases != null && !taxonBases.isEmpty()) { |
|
419 |
TaxonBase<?> taxonBase = taxonBases.iterator().next(); |
|
420 |
if (taxonBase instanceof Synonym) { |
|
421 |
taxon = ((Synonym)taxonBase).getAcceptedTaxon(); |
|
422 |
} |
|
423 |
} |
|
424 |
|
|
425 |
Set<TaxonNode> nodes = taxon.getTaxonNodes(); |
|
426 |
|
|
427 |
TaxonNode parentNode = null; |
|
428 |
if (nodes != null && !nodes.isEmpty()) { |
|
429 |
parentNode = nodes.iterator().next(); |
|
430 |
} |
|
431 |
return parentNode; |
|
432 |
} |
|
433 |
|
|
434 |
private Taxon getAcceptedTaxon(TaxonName name) { |
|
435 |
Taxon taxon = null; |
|
436 |
//prefer accepted taxon |
|
437 |
if (name.getTaxa() != null && !name.getTaxa().isEmpty()){ |
|
438 |
taxon = name.getTaxa().iterator().next(); |
|
439 |
//else take synonym |
|
440 |
}else if (name.getTaxonBases() != null && !name.getTaxonBases().isEmpty()){ |
|
441 |
TaxonBase<?> taxonBase = name.getTaxonBases().iterator().next(); |
|
442 |
if (taxonBase instanceof Synonym) { |
|
443 |
Synonym syn = (Synonym)taxonBase; |
|
444 |
taxon = syn.getAcceptedTaxon(); |
|
445 |
} |
|
446 |
} |
|
447 |
return taxon; |
|
448 |
} |
|
449 |
|
|
450 |
public static void main(String[] args) { |
|
451 |
PesiFindIdenticalNamesActivator activator = new PesiFindIdenticalNamesActivator(); |
|
452 |
activator.invoke(pesiSource); |
|
453 |
System.exit(0); |
|
454 |
} |
|
455 |
} |
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/merging/PesiMergeObject.java | ||
---|---|---|
4 | 4 |
|
5 | 5 |
public class PesiMergeObject { |
6 | 6 |
|
7 |
private String uuidFaunaEu; |
|
8 |
private String uuidErms; |
|
7 |
private String uuidName; |
|
9 | 8 |
|
10 |
private String idInFaunaEu; |
|
11 |
private String idInErms; |
|
9 |
private String idInSource; |
|
12 | 10 |
|
13 |
private String nameCacheInFaunaEu; |
|
14 |
private String nameCacheInErms; |
|
11 |
private String nameCache; |
|
15 | 12 |
|
16 |
private boolean statInFaunaEu; |
|
17 |
private boolean statInErms; |
|
13 |
private boolean status; |
|
18 | 14 |
|
19 |
private String authorInFaunaEu; |
|
20 |
private String authorInErms; |
|
15 |
private String author; |
|
21 | 16 |
|
22 |
private String rankInFaunaEu; |
|
23 |
private String rankInErms; |
|
17 |
private String rank; |
|
24 | 18 |
|
25 |
private TaxonNodeDto phylumInFaunaEu; |
|
26 |
private TaxonNodeDto phylumInErms; |
|
19 |
private TaxonNodeDto phylum; |
|
27 | 20 |
|
28 |
private String parentStringInFaunaEu; |
|
29 |
private String parentStringInErms; |
|
21 |
private String parentString; |
|
30 | 22 |
|
31 |
private String parentRankStringInFaunaEu; |
|
32 |
private String parentRankStringInErms; |
|
23 |
private String parentRankString; |
|
33 | 24 |
|
34 | 25 |
|
35 |
public String getUuidFaunaEu() { |
|
36 |
return uuidFaunaEu; |
|
37 |
} |
|
38 |
public void setUuidFaunaEu(String uuidFaunaEu) { |
|
39 |
this.uuidFaunaEu = uuidFaunaEu; |
|
40 |
} |
|
26 |
//************************ FACTORY *******************/ |
|
41 | 27 |
|
42 |
public String getUuidErms() { |
|
43 |
return uuidErms; |
|
44 |
} |
|
28 |
public static PesiMergeObject newInstance(){ |
|
29 |
return new PesiMergeObject(); |
|
45 | 30 |
|
46 |
public void setUuidErms(String uuidErms) { |
|
47 |
this.uuidErms = uuidErms; |
|
48 |
} |
|
31 |
} |
|
49 | 32 |
|
50 |
public String getParentRankStringInFaunaEu() { |
|
51 |
return parentRankStringInFaunaEu; |
|
52 |
} |
|
33 |
//************************* GETTER/SETTER **********************/ |
|
53 | 34 |
|
54 |
public void setParentRankStringInFaunaEu(String parentRankStringInFaunaEu) {
|
|
55 |
this.parentRankStringInFaunaEu = parentRankStringInFaunaEu;
|
|
35 |
public String getUuidName() {
|
|
36 |
return uuidName;
|
|
56 | 37 |
} |
57 |
|
|
58 |
public String getParentRankStringInErms() { |
|
59 |
return parentRankStringInErms; |
|
38 |
public void setUuidName(String uuidName) { |
|
39 |
this.uuidName = uuidName; |
|
60 | 40 |
} |
61 | 41 |
|
62 |
public void setParentRankStringInErms(String parentRankStringInErms) {
|
|
63 |
this.parentRankStringInErms = parentRankStringInErms;
|
|
42 |
public String getParentRankString() {
|
|
43 |
return parentRankString;
|
|
64 | 44 |
} |
65 |
|
|
66 |
public String getParentStringInFaunaEu() { |
|
67 |
return parentStringInFaunaEu; |
|
45 |
public void setParentRankString(String parentRankString) { |
|
46 |
this.parentRankString = parentRankString; |
|
68 | 47 |
} |
69 | 48 |
|
70 |
public void setParentStringInFaunaEu(String parentStringInFaunaEu) {
|
|
71 |
this.parentStringInFaunaEu = parentStringInFaunaEu;
|
|
49 |
public String getParentString() {
|
|
50 |
return parentString;
|
|
72 | 51 |
} |
73 |
|
|
74 |
public String getParentStringInErms() { |
|
75 |
return parentStringInErms; |
|
52 |
public void setParentString(String parentString) { |
|
53 |
this.parentString = parentString; |
|
76 | 54 |
} |
77 | 55 |
|
78 |
public void setParentStringInErms(String parentStringInErms) {
|
|
79 |
this.parentStringInErms = parentStringInErms;
|
|
56 |
public String getRank() {
|
|
57 |
return rank;
|
|
80 | 58 |
} |
81 |
|
|
82 |
public String getRankInFaunaEu() { |
|
83 |
return rankInFaunaEu; |
|
59 |
public void setRank(String rank) { |
|
60 |
this.rank = rank; |
|
84 | 61 |
} |
85 | 62 |
|
86 |
public void setRankInFaunaEu(String rankInFaunaEu) {
|
|
87 |
this.rankInFaunaEu = rankInFaunaEu;
|
|
63 |
public TaxonNodeDto getPhylum() {
|
|
64 |
return phylum;
|
|
88 | 65 |
} |
89 |
|
|
90 |
public TaxonNodeDto getPhylumInFaunaEu() { |
|
91 |
return phylumInFaunaEu; |
|
66 |
public void setPhylum(TaxonNodeDto phylum) { |
|
67 |
this.phylum = phylum; |
|
92 | 68 |
} |
93 | 69 |
|
94 |
public void setPhylumInFaunaEu(TaxonNodeDto phylumInFaunaEu) {
|
|
95 |
this.phylumInFaunaEu = phylumInFaunaEu;
|
|
70 |
public boolean isStatus() {
|
|
71 |
return status;
|
|
96 | 72 |
} |
97 |
|
|
98 |
public TaxonNodeDto getPhylumInErms() { |
|
99 |
return phylumInErms; |
|
73 |
public void setStatus(boolean status) { |
|
74 |
this.status = status; |
|
100 | 75 |
} |
101 | 76 |
|
102 |
public void setPhylumInErms(TaxonNodeDto phylumInErms) {
|
|
103 |
this.phylumInErms = phylumInErms;
|
|
77 |
public String getAuthor() {
|
|
78 |
return author;
|
|
104 | 79 |
} |
105 |
|
|
106 |
public String getRankInErms() { |
|
107 |
return rankInErms; |
|
80 |
public void setAuthor(String author) { |
|
81 |
this.author = author; |
|
108 | 82 |
} |
109 | 83 |
|
110 |
public void setRankInErms(String rankInErms) {
|
|
111 |
this.rankInErms = rankInErms;
|
|
84 |
public String getIdInSource() {
|
|
85 |
return idInSource;
|
|
112 | 86 |
} |
113 |
|
|
114 |
|
|
115 |
public static PesiMergeObject newInstance(){ |
|
116 |
return new PesiMergeObject(); |
|
117 |
|
|
87 |
public void setIdInSource(String idInSource) { |
|
88 |
this.idInSource = idInSource; |
|
118 | 89 |
} |
119 | 90 |
|
120 |
public boolean isStatInFaunaEu() { |
|
121 |
return statInFaunaEu; |
|
122 |
} |
|
123 |
public void setStatInFaunaEu(boolean statInFaunaEu) { |
|
124 |
this.statInFaunaEu = statInFaunaEu; |
|
91 |
public String getNameCache() { |
|
92 |
return nameCache; |
|
125 | 93 |
} |
126 |
public boolean isStatInErms() {
|
|
127 |
return statInErms;
|
|
94 |
public void setNameCache(String nameCache) {
|
|
95 |
this.nameCache = nameCache;
|
|
128 | 96 |
} |
129 |
public void setStatInErms(boolean statInErms) { |
|
130 |
this.statInErms = statInErms; |
|
131 |
} |
|
132 |
public String getAuthorInFaunaEu() { |
|
133 |
return authorInFaunaEu; |
|
134 |
} |
|
135 |
public void setAuthorInFaunaEu(String authorInFaunaEu) { |
|
136 |
this.authorInFaunaEu = authorInFaunaEu; |
|
137 |
} |
|
138 |
public String getAuthorInErms() { |
|
139 |
return authorInErms; |
|
140 |
} |
|
141 |
public void setAuthorInErms(String authorInErms) { |
|
142 |
this.authorInErms = authorInErms; |
|
143 |
} |
|
144 |
public String getIdInFaunaEu() { |
|
145 |
return idInFaunaEu; |
|
146 |
} |
|
147 |
public void setIdInFaunaEu(String idInFaunaEu) { |
|
148 |
this.idInFaunaEu = idInFaunaEu; |
|
149 |
} |
|
150 |
public String getIdInErms() { |
|
151 |
return idInErms; |
|
152 |
} |
|
153 |
public void setIdInErms(String idInErms) { |
|
154 |
this.idInErms = idInErms; |
|
155 |
} |
|
156 |
public String getNameCacheInFaunaEu() { |
|
157 |
return nameCacheInFaunaEu; |
|
158 |
} |
|
159 |
public void setNameCacheInFaunaEu(String nameCacheInFaunaEu) { |
|
160 |
this.nameCacheInFaunaEu = nameCacheInFaunaEu; |
|
161 |
} |
|
162 |
public String getNameCacheInErms() { |
|
163 |
return nameCacheInErms; |
|
164 |
} |
|
165 |
public void setNameCacheInErms(String nameCacheInErms) { |
|
166 |
this.nameCacheInErms = nameCacheInErms; |
|
167 |
} |
|
168 |
|
|
169 |
|
|
170 |
|
|
171 | 97 |
} |
Also available in: Unified diff
ref #1447 improve PesiFindIdenticalNamesActivator