1
|
package eu.etaxonomy.cdm.app.pesi.merging;
|
2
|
|
3
|
import java.io.File;
|
4
|
import java.io.FileOutputStream;
|
5
|
import java.io.IOException;
|
6
|
import java.io.OutputStreamWriter;
|
7
|
import java.io.Writer;
|
8
|
import java.lang.reflect.InvocationTargetException;
|
9
|
import java.lang.reflect.Method;
|
10
|
import java.nio.charset.StandardCharsets;
|
11
|
import java.util.ArrayList;
|
12
|
import java.util.Arrays;
|
13
|
import java.util.HashMap;
|
14
|
import java.util.HashSet;
|
15
|
import java.util.Iterator;
|
16
|
import java.util.List;
|
17
|
import java.util.Map;
|
18
|
import java.util.Set;
|
19
|
import java.util.UUID;
|
20
|
|
21
|
import org.apache.commons.lang3.StringUtils;
|
22
|
import org.apache.log4j.Logger;
|
23
|
import org.springframework.transaction.TransactionStatus;
|
24
|
|
25
|
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
|
26
|
import eu.etaxonomy.cdm.app.common.CdmDestinations;
|
27
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
28
|
import eu.etaxonomy.cdm.common.StringComparator;
|
29
|
import eu.etaxonomy.cdm.database.DbSchemaValidation;
|
30
|
import eu.etaxonomy.cdm.database.ICdmDataSource;
|
31
|
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
|
32
|
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
|
33
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
34
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
35
|
import eu.etaxonomy.cdm.model.name.Rank;
|
36
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
37
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
38
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
39
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
40
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
41
|
import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
|
42
|
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
|
43
|
import eu.etaxonomy.cdm.persistence.dto.TaxonNodeDto;
|
44
|
|
45
|
/**
|
46
|
* Finds taxa with identical {@link TaxonName#getNameCache() name cache} but from different
|
47
|
* sources (import source) and writes them into multiple csv file.
|
48
|
* All cases are stored in file xxx_namesAll, some prefiltered files are created for e.g.
|
49
|
* those having different parents or different authors.
|
50
|
* Taxa are pairwise compared. If a name appears in 3 sources for each of the 3 pairs 1 record
|
51
|
* is created below each other. Also if a name appears multiple times (e.g. homonyms) in 1
|
52
|
* DB and 1 time in another. Each of the multiple names is compared to the other databases
|
53
|
* record.
|
54
|
* <BR><BR>
|
55
|
*
|
56
|
* TODO is is necessary to create these extra files? Filters can also be appied in Excel.
|
57
|
*
|
58
|
* @author a.mueller
|
59
|
* @since 22.01.2020
|
60
|
*/
|
61
|
public class PesiFindIdenticalNamesActivator {
|
62
|
|
63
|
private static final Logger logger = Logger.getLogger(PesiFindIdenticalNamesActivator.class);
|
64
|
|
65
|
//static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2();
|
66
|
static final ICdmDataSource pesiSource = CdmDestinations.cdm_pesi2019_final();
|
67
|
|
68
|
static final String path = System.getProperty("user.home")+File.separator+".cdmLibrary"+File.separator+"pesi"+File.separator+"pesimerge";
|
69
|
|
70
|
private static UUID emSourceUuid = PesiTransformer.uuidSourceRefEuroMed;
|
71
|
private static UUID ermsSourceUuid = PesiTransformer.uuidSourceRefErms;
|
72
|
private static UUID faunaEuSourceUuid = PesiTransformer.uuidSourceRefFaunaEuropaea;
|
73
|
private static UUID ifSourceUuid = PesiTransformer.uuidSourceRefIndexFungorum;
|
74
|
private static List<UUID> sourceRefUuids = new ArrayList<>();
|
75
|
private static Map<UUID,String> sourcesLabels = new HashMap<>();
|
76
|
|
77
|
static {
|
78
|
sourceRefUuids.addAll(Arrays.asList(new UUID[]{emSourceUuid, ermsSourceUuid, faunaEuSourceUuid, ifSourceUuid}));
|
79
|
sourcesLabels.put(emSourceUuid, "E+M");
|
80
|
sourcesLabels.put(ermsSourceUuid, "ERMS");
|
81
|
sourcesLabels.put(faunaEuSourceUuid, "FauEu");
|
82
|
sourcesLabels.put(ifSourceUuid, "IF");
|
83
|
}
|
84
|
|
85
|
private void invoke(ICdmDataSource source){
|
86
|
|
87
|
CdmApplicationController app = CdmIoApplicationController.NewInstance(source, DbSchemaValidation.VALIDATE, false);
|
88
|
|
89
|
List<String> propertyPaths = new ArrayList<>();
|
90
|
propertyPaths.add("sources.*");
|
91
|
propertyPaths.add("sources.idInSource");
|
92
|
propertyPaths.add("sources.idNamespace");
|
93
|
propertyPaths.add("taxonBases.*");
|
94
|
propertyPaths.add("taxonBases.relationsFromThisTaxon");
|
95
|
propertyPaths.add("taxonBases.taxonNodes.*");
|
96
|
propertyPaths.add("taxonBases.taxonNodes.parent.*");
|
97
|
propertyPaths.add("taxonBases.taxonNodes.childNodes.*");
|
98
|
propertyPaths.add("taxonBases.taxonNodes.childNodes.classification.rootNode.childNodes.*");
|
99
|
propertyPaths.add("taxonBases.taxonNodes.parent.taxon.name.*");
|
100
|
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.*");
|
101
|
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.*");
|
102
|
propertyPaths.add("taxonBases.acceptedTaxon.taxonNodes.childNodes.classification.rootNode.childNodes.*");
|
103
|
System.out.println("Start getIdenticalNames...");
|
104
|
|
105
|
Map<String, Map<UUID, Set<TaxonName>>> namesOfIdenticalTaxa;
|
106
|
TransactionStatus tx = app.startTransaction(true);
|
107
|
try {
|
108
|
namesOfIdenticalTaxa = app.getTaxonService().findIdenticalTaxonNames(sourceRefUuids, propertyPaths);
|
109
|
} catch (Exception e) {
|
110
|
e.printStackTrace();
|
111
|
return;
|
112
|
}
|
113
|
System.out.println("Start creating merging objects");
|
114
|
List<Map<UUID, List<PesiMergeObject>>> mergingObjects = createMergeObjects(namesOfIdenticalTaxa, app);
|
115
|
app.commitTransaction(tx);
|
116
|
|
117
|
boolean resultOK = true;
|
118
|
System.out.println("Start creating csv files");
|
119
|
resultOK &= writeSameNamesToCsvFile(mergingObjects, path + "_namesAll.csv");
|
120
|
resultOK &= writeSameNamesDifferentAuthorToCsv(mergingObjects, path + "_authors.csv");
|
121
|
resultOK &= writeSameNamesDifferentStatusToCsv(mergingObjects, path + "_status.csv");
|
122
|
resultOK &= writeSameNamesDifferentPhylumToCsv(mergingObjects, path + "_phylum.csv");
|
123
|
resultOK &= writeSameNamesDifferentParentToCsv(mergingObjects, path + "_parent.csv");
|
124
|
resultOK &= writeSameNamesDifferentRankToCsv(mergingObjects, path + "_rank.csv");
|
125
|
|
126
|
System.out.println("End find identical names for PESI: " + resultOK + ". Results written to " + path);
|
127
|
}
|
128
|
|
129
|
private boolean writeSameNamesToCsvFile(
|
130
|
List<Map<UUID,List<PesiMergeObject>>> mergingObjects, String sFileName) {
|
131
|
|
132
|
String header = "same names (all)";
|
133
|
String methodName = null;
|
134
|
return writeDifference(header, methodName, mergingObjects, sFileName);
|
135
|
}
|
136
|
|
137
|
private boolean writeSameNamesDifferentPhylumToCsv(
|
138
|
List<Map<UUID,List<PesiMergeObject>>> mergingObjects, String sFileName){
|
139
|
|
140
|
String header = "same names but different phylum";
|
141
|
String methodName = "getPhylum";
|
142
|
return writeDifference(header, methodName, mergingObjects, sFileName);
|
143
|
}
|
144
|
|
145
|
private boolean writeSameNamesDifferentParentToCsv(
|
146
|
List<Map<UUID,List<PesiMergeObject>>> mergingObjects, String sFileName){
|
147
|
|
148
|
String header = "same names but different parent";
|
149
|
String methodName = "getParentString";
|
150
|
return writeDifference(header, methodName, mergingObjects, sFileName);
|
151
|
}
|
152
|
|
153
|
private boolean writeSameNamesDifferentRankToCsv(
|
154
|
List<Map<UUID,List<PesiMergeObject>>> mergingObjects, String sFileName){
|
155
|
|
156
|
String header = "same names but different rank";
|
157
|
String methodName = "getRank";
|
158
|
return writeDifference(header, methodName, mergingObjects, sFileName);
|
159
|
}
|
160
|
|
161
|
private boolean writeSameNamesDifferentStatusToCsv(
|
162
|
List<Map<UUID,List<PesiMergeObject>>> mergingObjects, String sFileName){
|
163
|
|
164
|
String header = "same names but different status";
|
165
|
String methodName = "isStatus";
|
166
|
return writeDifference(header, methodName, mergingObjects, sFileName);
|
167
|
}
|
168
|
|
169
|
private boolean writeSameNamesDifferentAuthorToCsv(
|
170
|
List<Map<UUID,List<PesiMergeObject>>> mergingObjects, String sFileName){
|
171
|
|
172
|
String header = "same names but different author";
|
173
|
String methodName = "getAuthor";
|
174
|
return writeDifference(header, methodName, mergingObjects, sFileName);
|
175
|
}
|
176
|
|
177
|
private boolean writeDifference(String header,
|
178
|
String methodName,
|
179
|
List<Map<UUID,List<PesiMergeObject>>> mergingObjects,
|
180
|
String sFileName) {
|
181
|
|
182
|
try{
|
183
|
Method method = methodName == null? null : PesiMergeObject.class.getMethod(methodName);
|
184
|
|
185
|
Writer writer = new OutputStreamWriter(new FileOutputStream(new File(sFileName)), StandardCharsets.UTF_8);
|
186
|
|
187
|
//create Header
|
188
|
createHeader(writer, header);
|
189
|
|
190
|
//write data
|
191
|
for (Map<UUID,List<PesiMergeObject>> merging : mergingObjects){
|
192
|
boolean isNextNameCache = true;
|
193
|
List<UUID> mySources = new ArrayList<>(merging.keySet());
|
194
|
for (int i = 0; i<mySources.size()-1; i++){
|
195
|
for (int j = i+1; j<mySources.size(); j++){
|
196
|
boolean differenceExists = false;
|
197
|
List<PesiMergeObject> mergeList1 = merging.get(mySources.get(i));
|
198
|
List<PesiMergeObject> mergeList2 = merging.get(mySources.get(j));
|
199
|
for (PesiMergeObject merge1 : mergeList1){
|
200
|
for (PesiMergeObject merge2 : mergeList2){
|
201
|
differenceExists |= isDifferent(merge1, merge2, method);
|
202
|
}
|
203
|
}
|
204
|
if (differenceExists){
|
205
|
for (PesiMergeObject merge1 : mergeList1){
|
206
|
for (PesiMergeObject merge2 : mergeList2){
|
207
|
writeCsvLine(writer, merge1, merge2, method, isNextNameCache);
|
208
|
isNextNameCache = false;
|
209
|
}
|
210
|
}
|
211
|
}
|
212
|
}
|
213
|
}
|
214
|
}
|
215
|
writer.flush();
|
216
|
writer.close();
|
217
|
return true;
|
218
|
}catch(NoSuchMethodException | SecurityException | IOException e){
|
219
|
logger.error(e.getMessage());
|
220
|
return false;
|
221
|
}
|
222
|
}
|
223
|
|
224
|
private boolean isDifferent(PesiMergeObject merge1, PesiMergeObject merge2, Method method){
|
225
|
|
226
|
try {
|
227
|
if (method == null){
|
228
|
return true;
|
229
|
}
|
230
|
Object value1 = method.invoke(merge1);
|
231
|
Object value2 = method.invoke(merge2);
|
232
|
return !CdmUtils.nullSafeEqual(value1, value2);
|
233
|
} catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException e) {
|
234
|
e.printStackTrace();
|
235
|
return true;
|
236
|
}
|
237
|
}
|
238
|
|
239
|
private void createHeader(Writer writer, String firstLine){
|
240
|
try {
|
241
|
writer.append(firstLine);
|
242
|
writer.append('\n');
|
243
|
writeHeaderPair(writer, "tid");
|
244
|
writer.append("use;");
|
245
|
writer.append("nameUse;");
|
246
|
writer.append("next;");
|
247
|
writer.append("diff;");
|
248
|
writeHeaderPair(writer, "src");
|
249
|
// writeHeaderPair(writer, "nuuid");
|
250
|
// writeHeaderPair(writer, "idInSource");
|
251
|
writer.append("nameCache;");
|
252
|
writeHeaderPair(writer, "author");
|
253
|
writeHeaderPair(writer, "nom.ref.");
|
254
|
writeHeaderPair(writer, "rank");
|
255
|
writeHeaderPair(writer, "kingdom");
|
256
|
writeHeaderPair(writer, "phylum");
|
257
|
writeHeaderPair(writer, "family");
|
258
|
writeHeaderPair(writer, "parentString");
|
259
|
writeHeaderPair(writer, "parentRankString");
|
260
|
writeHeaderPair(writer, "status");
|
261
|
writeHeaderPair(writer, "tuuid");
|
262
|
|
263
|
writer.append('\n');
|
264
|
} catch (IOException e) {
|
265
|
e.printStackTrace();
|
266
|
}
|
267
|
}
|
268
|
|
269
|
private void writeHeaderPair(Writer writer, String header) throws IOException {
|
270
|
writer.append(header+"1").append(';');
|
271
|
writer.append(header+"2").append(';');
|
272
|
}
|
273
|
|
274
|
private void writeCsvLine(Writer writer,
|
275
|
PesiMergeObject merge1, PesiMergeObject merge2,
|
276
|
Method method, boolean isNextNameCache){
|
277
|
|
278
|
writePair(writer, merge1, merge2, "IdTaxon", Compare.NO);
|
279
|
writeSingleValue(writer, "");
|
280
|
writeSingleValue(writer, "");
|
281
|
writeSingleValue(writer, isNextNameCache?"1":"0");
|
282
|
boolean different = isDifferent(merge1, merge2, method);
|
283
|
writeSingleValue(writer, different?"1":"0");
|
284
|
writeSingleValue(writer, sourcesLabels.get(UUID.fromString(merge1.getUuidSource())));
|
285
|
writeSingleValue(writer, sourcesLabels.get(UUID.fromString(merge2.getUuidSource())));
|
286
|
// writePair(writer, merge1, merge2, "UuidName");
|
287
|
// writePair(writer, merge1, merge2, "IdInSource");
|
288
|
writeSingleValue(writer, merge1.getNameCache());
|
289
|
// writePair(writer, merge1, merge2, "NameCache");
|
290
|
writePair(writer, merge1, merge2, "Author", Compare.YES);
|
291
|
writePair(writer, merge1, merge2, "NomenclaturalReference", Compare.YES);
|
292
|
writePair(writer, merge1, merge2, "Rank", Compare.YES);
|
293
|
writePair(writer, merge1, merge2, "KingdomCache", Compare.KEEP_FIRST);
|
294
|
writePair(writer, merge1, merge2, "PhylumCache", Compare.YES);
|
295
|
writePair(writer, merge1, merge2, "FamilyCache", Compare.YES);
|
296
|
writePair(writer, merge1, merge2, "ParentString", Compare.YES);
|
297
|
writePair(writer, merge1, merge2, "ParentRankString", Compare.YES);
|
298
|
writePair(writer, merge1, merge2, "StatusStr", Compare.YES);
|
299
|
writePair(writer, merge1, merge2, "UuidTaxon", Compare.YES);
|
300
|
|
301
|
try {
|
302
|
writer.append('\n');
|
303
|
} catch (IOException e) {
|
304
|
e.printStackTrace();
|
305
|
}
|
306
|
}
|
307
|
|
308
|
private void writeSingleValue(Writer writer, String value) {
|
309
|
try {
|
310
|
writer.append(value).append(";");
|
311
|
} catch (Exception e) {
|
312
|
e.printStackTrace();
|
313
|
}
|
314
|
}
|
315
|
|
316
|
private enum Compare{
|
317
|
NO,
|
318
|
YES,
|
319
|
KEEP_FIRST;
|
320
|
|
321
|
boolean isAnyCompare(){
|
322
|
return this == NO;
|
323
|
}
|
324
|
}
|
325
|
|
326
|
private void writePair(Writer writer, PesiMergeObject merge1, PesiMergeObject merge2, String methodName, Compare compare) {
|
327
|
try {
|
328
|
Method method = PesiMergeObject.class.getDeclaredMethod("get"+methodName);
|
329
|
String value1 = (String) method.invoke(merge1);
|
330
|
String value2 = (String) method.invoke(merge2);
|
331
|
if (compare.isAnyCompare() && CdmUtils.nullSafeEqual(value1, value2)){
|
332
|
value2 = StringUtils.isBlank(value2)? "":"-";
|
333
|
if (compare == Compare.YES){
|
334
|
value1 = value2;
|
335
|
}
|
336
|
}
|
337
|
writer.append(normalize(value1)).append(";");
|
338
|
writer.append(normalize(value2)).append(";");
|
339
|
} catch (Exception e) {
|
340
|
e.printStackTrace();
|
341
|
}
|
342
|
}
|
343
|
|
344
|
private String normalize(String val) {
|
345
|
return CdmUtils.Nz(val).replace(";", "@");
|
346
|
}
|
347
|
|
348
|
private List<Map<UUID,List<PesiMergeObject>>> createMergeObjects(
|
349
|
Map<String, Map<UUID, Set<TaxonName>>> identicalNames,
|
350
|
CdmApplicationController appCtr){
|
351
|
|
352
|
List<Map<UUID,List<PesiMergeObject>>> merge = new ArrayList<>();
|
353
|
|
354
|
List<String> nameCaches = new ArrayList<>(identicalNames.keySet());
|
355
|
nameCaches.sort(StringComparator.Instance);
|
356
|
for (String nameCache: nameCaches){
|
357
|
createSingleMergeObject(appCtr, merge, identicalNames.get(nameCache));
|
358
|
}
|
359
|
return merge;
|
360
|
}
|
361
|
|
362
|
private void createSingleMergeObject(CdmApplicationController appCtr,
|
363
|
List<Map<UUID,List<PesiMergeObject>>> merge,
|
364
|
Map<UUID, Set<TaxonName>> identicalNames) {
|
365
|
|
366
|
Map<UUID,List<PesiMergeObject>> mergeMap = new HashMap<>();
|
367
|
|
368
|
for (UUID sourceUuid : identicalNames.keySet()){
|
369
|
Set<TaxonName> names = identicalNames.get(sourceUuid);
|
370
|
List<PesiMergeObject> pmoList = new ArrayList<>();
|
371
|
mergeMap.put(sourceUuid, pmoList);
|
372
|
|
373
|
for (TaxonName name : names){
|
374
|
String nameAndIdStr = name.getTitleCache() + "; id = " + name.getId();
|
375
|
@SuppressWarnings("rawtypes")
|
376
|
Set<TaxonBase> taxonBases = name.getTaxonBases();
|
377
|
if (taxonBases.isEmpty()){
|
378
|
logger.warn("No taxonbase attached to name. This is not yet handled: " + nameAndIdStr);
|
379
|
continue;
|
380
|
}
|
381
|
for (TaxonBase<?> taxonBase : taxonBases) {
|
382
|
if (!taxonBase.isPublish()){
|
383
|
continue;
|
384
|
}
|
385
|
PesiMergeObject mergeObject = PesiMergeObject.NewInstance();
|
386
|
pmoList.add(mergeObject);
|
387
|
|
388
|
//uuid
|
389
|
mergeObject.setUuidSource(sourceUuid.toString());
|
390
|
mergeObject.setUuidName(name.getUuid().toString());
|
391
|
mergeObject.setUuidTaxon(taxonBase.getUuid().toString());
|
392
|
mergeObject.setIdTaxon(String.valueOf(taxonBase.getId()));
|
393
|
|
394
|
//nameCache
|
395
|
mergeObject.setNameCache(name.getNameCache());
|
396
|
|
397
|
//authorship
|
398
|
mergeObject.setAuthor(name.getAuthorshipCache());
|
399
|
|
400
|
//nom.ref.
|
401
|
mergeObject.setNomenclaturalReference(name.getNomenclaturalReference()== null?null: name.getNomenclaturalReference().getAbbrevTitleCache());
|
402
|
|
403
|
//rank
|
404
|
mergeObject.setRank(name.getRank().getLabel());
|
405
|
|
406
|
//Kingdom
|
407
|
TaxonNodeDto kingdom = getHigherTaxon(appCtr, name, Rank.KINGDOM());
|
408
|
mergeObject.setKingdom(kingdom);
|
409
|
|
410
|
//Phylum/Division
|
411
|
TaxonNodeDto phylum = getHigherTaxon(appCtr, name, Rank.PHYLUM());
|
412
|
if(phylum == null){
|
413
|
phylum = getHigherTaxon(appCtr, name, Rank.DIVISION());
|
414
|
}
|
415
|
mergeObject.setPhylum(phylum);
|
416
|
|
417
|
//Family
|
418
|
TaxonNodeDto family = getHigherTaxon(appCtr, name, Rank.FAMILY());
|
419
|
mergeObject.setFamily(family);
|
420
|
|
421
|
//idInSource
|
422
|
Iterator<IdentifiableSource> sources = name.getSources().iterator();
|
423
|
//TODO idInSource - what if multiple sources exist?
|
424
|
if (sources.hasNext()){
|
425
|
IdentifiableSource source = sources.next();
|
426
|
String idInSource = source.getIdInSource();
|
427
|
mergeObject.setIdInSource(idInSource);
|
428
|
}
|
429
|
|
430
|
//status and parent
|
431
|
makeStatusAndParent(name, mergeObject);
|
432
|
}
|
433
|
}
|
434
|
}
|
435
|
|
436
|
merge.add(mergeMap);
|
437
|
|
438
|
|
439
|
//set parent informations
|
440
|
|
441
|
/*
|
442
|
Set<HybridRelationship> parentRelations = zooName.getParentRelationships();
|
443
|
Iterator parentIterator = parentRelations.iterator();
|
444
|
HybridRelationship parentRel;
|
445
|
ZoologicalName parentName;
|
446
|
while (parentIterator.hasNext()){
|
447
|
parentRel = (HybridRelationship)parentIterator.next();
|
448
|
parentName = (ZoologicalName)parentRel.getParentName();
|
449
|
mergeObject.setParentRankStringInErms(parentName.getRank().getLabel());
|
450
|
mergeObject.setParentStringInErms(parentName.getNameCache());
|
451
|
}
|
452
|
|
453
|
parentRelations = zooName2.getParentRelationships();
|
454
|
parentIterator = parentRelations.iterator();
|
455
|
|
456
|
while (parentIterator.hasNext()){
|
457
|
parentRel = (HybridRelationship)parentIterator.next();
|
458
|
parentName = (ZoologicalName)parentRel.getParentName();
|
459
|
mergeObject.setParentRankStringInFaunaEu(parentName.getRank().getLabel());
|
460
|
mergeObject.setParentStringInFaunaEu(parentName.getNameCache());
|
461
|
}*/
|
462
|
|
463
|
|
464
|
}
|
465
|
|
466
|
private void makeStatusAndParent(TaxonName name, PesiMergeObject mergeObject) {
|
467
|
Set<Taxon> taxa = name.getTaxa();
|
468
|
taxa = getReallyAcceptedTaxa(taxa);
|
469
|
if (!taxa.isEmpty()){
|
470
|
mergeObject.setStatus(true);
|
471
|
Iterator<Taxon> taxaIterator = taxa.iterator();
|
472
|
Taxon taxon = null;
|
473
|
while (taxaIterator.hasNext()){
|
474
|
taxon = taxaIterator.next();
|
475
|
if (!taxon.isMisapplication()){
|
476
|
break;
|
477
|
}
|
478
|
}
|
479
|
@SuppressWarnings("null")
|
480
|
Set<TaxonNode> nodes = taxon.getTaxonNodes();
|
481
|
Iterator<TaxonNode> taxonNodeIterator = nodes.iterator();
|
482
|
TaxonNode parentNode = null;
|
483
|
while (taxonNodeIterator.hasNext()){
|
484
|
TaxonNode node = taxonNodeIterator.next();
|
485
|
if (!node.isTopmostNode()){
|
486
|
parentNode = node.getParent();
|
487
|
}
|
488
|
}
|
489
|
if (parentNode != null){
|
490
|
TaxonName parentName = CdmBase.deproxy(parentNode.getTaxon().getName());
|
491
|
String parentNameCache = parentName.getNameCache();
|
492
|
mergeObject.setParentString(parentNameCache);
|
493
|
mergeObject.setParentRankString(parentName.getRank().getLabel());
|
494
|
}
|
495
|
}else{
|
496
|
mergeObject.setStatus(false);
|
497
|
TaxonNode parentNode = getAcceptedNode(name);
|
498
|
if (parentNode != null){
|
499
|
TaxonName parentName = CdmBase.deproxy(parentNode.getTaxon().getName());
|
500
|
String parentNameCache = parentName.getNameCache();
|
501
|
mergeObject.setParentString(parentNameCache);
|
502
|
mergeObject.setParentRankString(parentName.getRank().getLabel());
|
503
|
}
|
504
|
}
|
505
|
}
|
506
|
|
507
|
private TaxonNodeDto getHigherTaxon(CdmApplicationController appCtr, TaxonName name, Rank rank) {
|
508
|
if (name.getRank().equals(rank)) {
|
509
|
Taxon taxon = getAcceptedTaxon(name);
|
510
|
if (taxon != null) {
|
511
|
if (taxon.getTaxonNodes().isEmpty()){
|
512
|
return null; //probably MAN
|
513
|
}
|
514
|
if (taxon.getTaxonNodes().size()>1){
|
515
|
logger.warn("More than 1 node not yet handled for getHigherTaxon. Take arbitrary one.");
|
516
|
}
|
517
|
TaxonNode node = taxon.getTaxonNodes().iterator().next();
|
518
|
return new TaxonNodeDto(node);
|
519
|
}
|
520
|
}
|
521
|
if (name.getRank().isHigher(rank)){
|
522
|
return null;
|
523
|
}else{
|
524
|
Taxon taxon = getAcceptedTaxon(name);
|
525
|
if (taxon.getTaxonNodes().isEmpty()){
|
526
|
return null;
|
527
|
}else{
|
528
|
if (taxon.getTaxonNodes().size()>1){
|
529
|
logger.warn("More than 1 node not yet handled for getHigherTaxon. Take arbitrary one.");
|
530
|
}
|
531
|
TaxonNode node = taxon.getTaxonNodes().iterator().next();
|
532
|
List<TaxonNodeDto> higherDtos = appCtr.getTaxonNodeService().taxonNodeDtoParentRank(node.getClassification(), rank, taxon);
|
533
|
if (higherDtos.isEmpty()){
|
534
|
return null;
|
535
|
}else {
|
536
|
if (higherDtos.size() > 1){
|
537
|
logger.warn("More than 1 higher dto. This is not yet implemented: " + taxon.getTitleCache());
|
538
|
}
|
539
|
return higherDtos.get(0);
|
540
|
}
|
541
|
}
|
542
|
}
|
543
|
}
|
544
|
|
545
|
private TaxonNode getAcceptedNode(TaxonName ermsName) {
|
546
|
TaxonNode parentNode = null;
|
547
|
Set<TaxonBase> taxonBases = ermsName.getTaxonBases();
|
548
|
if (!taxonBases.isEmpty()) {
|
549
|
Taxon taxon = null;
|
550
|
TaxonBase<?> taxonBase = taxonBases.iterator().next();
|
551
|
if (taxonBase instanceof Synonym) {
|
552
|
taxon = ((Synonym)taxonBase).getAcceptedTaxon();
|
553
|
}else{
|
554
|
taxon = getAccTaxonForTaxonSynonym((Taxon)taxonBase);
|
555
|
}
|
556
|
Set<TaxonNode> nodes = taxon.getTaxonNodes();
|
557
|
if (!nodes.isEmpty()) {
|
558
|
parentNode = nodes.iterator().next();
|
559
|
}
|
560
|
}
|
561
|
|
562
|
return parentNode;
|
563
|
}
|
564
|
|
565
|
private Taxon getAcceptedTaxon(TaxonName name) {
|
566
|
Taxon taxon = null;
|
567
|
//prefer accepted taxon
|
568
|
if (name.getTaxa() != null && !name.getTaxa().isEmpty()){
|
569
|
taxon = name.getTaxa().iterator().next();
|
570
|
taxon = getAccTaxonForTaxonSynonym(taxon);
|
571
|
//else take synonym
|
572
|
}else if (name.getTaxonBases() != null && !name.getTaxonBases().isEmpty()){
|
573
|
TaxonBase<?> taxonBase = name.getTaxonBases().iterator().next();
|
574
|
if (taxonBase instanceof Synonym) {
|
575
|
Synonym syn = (Synonym)taxonBase;
|
576
|
taxon = syn.getAcceptedTaxon();
|
577
|
}
|
578
|
}
|
579
|
return taxon;
|
580
|
}
|
581
|
|
582
|
private Taxon getAccTaxonForTaxonSynonym(Taxon taxon) {
|
583
|
if (!taxon.getRelationsFromThisTaxon().isEmpty()){
|
584
|
for (TaxonRelationship rel: taxon.getRelationsFromThisTaxon()){
|
585
|
UUID uuidType = rel.getType().getUuid();
|
586
|
if (uuidType.equals(TaxonRelationshipType.uuidSynonymOfTaxonRelationship)
|
587
|
|| uuidType.equals(TaxonRelationshipType.uuidHeterotypicSynonymTaxonRelationship)
|
588
|
|| uuidType.equals(TaxonRelationshipType.uuidHomotypicSynonymTaxonRelationship)){
|
589
|
taxon = rel.getToTaxon();
|
590
|
}
|
591
|
}
|
592
|
}
|
593
|
return taxon;
|
594
|
}
|
595
|
|
596
|
/**
|
597
|
* Filters out the ERMS taxon synonyms
|
598
|
*/
|
599
|
private Set<Taxon> getReallyAcceptedTaxa(Set<Taxon> taxa) {
|
600
|
Set<Taxon> result = new HashSet<>();
|
601
|
for (Taxon taxon : taxa){
|
602
|
Taxon accTaxon = getAccTaxonForTaxonSynonym(taxon);
|
603
|
if(taxon.equals(accTaxon)) {
|
604
|
result.add(taxon);
|
605
|
}
|
606
|
}
|
607
|
return result;
|
608
|
}
|
609
|
|
610
|
private CharSequence Nz(String str) {
|
611
|
return CdmUtils.Nz(str);
|
612
|
}
|
613
|
|
614
|
public static void main(String[] args) {
|
615
|
PesiFindIdenticalNamesActivator activator = new PesiFindIdenticalNamesActivator();
|
616
|
activator.invoke(pesiSource);
|
617
|
System.exit(0);
|
618
|
}
|
619
|
}
|