1
|
/**
|
2
|
* Copyright (C) 2017 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.model.name;
|
10
|
|
11
|
import java.io.Serializable;
|
12
|
import java.util.Comparator;
|
13
|
import java.util.HashSet;
|
14
|
import java.util.Set;
|
15
|
|
16
|
import org.apache.log4j.Logger;
|
17
|
|
18
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
19
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
20
|
import eu.etaxonomy.cdm.model.taxon.TaxonComparator;
|
21
|
|
22
|
/**
|
23
|
* This class orders synonyms of a homotypic group,
|
24
|
* first by
|
25
|
* <ul>
|
26
|
* <li>Basionym groups (the basionym and all names derived from this basionym)
|
27
|
* should be kept together in a subgroup</li>
|
28
|
* <li>The order of the subgroups is defined by the ordering of their
|
29
|
* basionyms (according to the following ordering)</li>
|
30
|
* <li>If a name is illegitimate or not does play a role for ordering</li>
|
31
|
* <li>Names with publication year should always come first</li>
|
32
|
* <li>Names with no publication year are sorted by rank</li>
|
33
|
* <li>Names with no publication year and equal rank are sorted alphabetically</li>
|
34
|
* <li>If 2 names have a replaced synonym relationship the replaced synonym comes first,
|
35
|
* the replacement name comes later as this reflects the order of publication</li>
|
36
|
* </ul>
|
37
|
*
|
38
|
* Details on ordering are explained at http://dev.e-taxonomy.eu/trac/ticket/3338<BR>
|
39
|
*
|
40
|
* @author k.luther
|
41
|
* @date 20.03.2017
|
42
|
*
|
43
|
*/
|
44
|
public class HomotypicalGroupNameComparator implements Comparator<TaxonNameBase>, Serializable{
|
45
|
|
46
|
private static final Logger logger = Logger.getLogger(HomotypicalGroupNameComparator.class);
|
47
|
|
48
|
|
49
|
private final TaxonNameBase<?,?> firstNameInGroup;
|
50
|
private boolean includeRanks = false;
|
51
|
|
52
|
/**
|
53
|
* @param firstNameInGroup
|
54
|
*/
|
55
|
public HomotypicalGroupNameComparator(@SuppressWarnings("rawtypes") TaxonNameBase firstNameInGroup, boolean includeRanks) {
|
56
|
super();
|
57
|
this.firstNameInGroup = firstNameInGroup;
|
58
|
this.includeRanks = includeRanks;
|
59
|
}
|
60
|
|
61
|
|
62
|
/**
|
63
|
*
|
64
|
* @see TaxonComparator#compare(TaxonBase, TaxonBase)
|
65
|
* @see java.lang.String#compareTo(String)
|
66
|
* @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
|
67
|
*/
|
68
|
@Override
|
69
|
public int compare(
|
70
|
@SuppressWarnings("rawtypes") TaxonNameBase taxonNameBase1,
|
71
|
@SuppressWarnings("rawtypes") TaxonNameBase taxonNameBase2) {
|
72
|
|
73
|
|
74
|
if (logger.isDebugEnabled()){logger.debug(taxonNameBase1.getTitleCache() +" : "+ taxonNameBase2.getTitleCache());}
|
75
|
|
76
|
|
77
|
int compareStatus = compareStatus(taxonNameBase1, taxonNameBase2);
|
78
|
if (compareStatus != 0){
|
79
|
return compareStatus;
|
80
|
}
|
81
|
|
82
|
//not same homotypical group -
|
83
|
//NOTE: this comparator should usually not be used
|
84
|
// for comparing names of different homotypical groups.
|
85
|
// The following is only to have a defined compare behavior
|
86
|
// which follows the contract of Comparator#compare.
|
87
|
if (taxonNameBase1 == null ||
|
88
|
taxonNameBase2 == null ||
|
89
|
! taxonNameBase1.getHomotypicalGroup().equals(taxonNameBase2.getHomotypicalGroup())){
|
90
|
|
91
|
String compareString1 =
|
92
|
taxonNameBase1.getHomotypicalGroup().getUuid().toString() ;
|
93
|
String compareString2 =
|
94
|
taxonNameBase2.getHomotypicalGroup().getUuid().toString() ;
|
95
|
int result = compareString1.compareTo(compareString2);
|
96
|
return result;
|
97
|
}
|
98
|
|
99
|
//same homotypical group ...
|
100
|
//one taxon is first in group
|
101
|
if (taxonNameBase1.equals(firstNameInGroup)){
|
102
|
return -1;
|
103
|
}else if (taxonNameBase2.equals(firstNameInGroup)){
|
104
|
return 1;
|
105
|
}
|
106
|
|
107
|
|
108
|
|
109
|
TaxonNameBase<?,?> basionym1 = getPreferredInBasionymGroup(taxonNameBase1);
|
110
|
TaxonNameBase<?,?> basionym2 = getPreferredInBasionymGroup(taxonNameBase2);
|
111
|
|
112
|
int compareResult;
|
113
|
if (basionym1.equals(basionym2)){
|
114
|
//both names belong to same basionym sub-group
|
115
|
compareResult = handleSameBasionym(basionym1, taxonNameBase1, taxonNameBase2);
|
116
|
}else{
|
117
|
compareResult = compareBasionyms(basionym1, basionym2);
|
118
|
}
|
119
|
|
120
|
return compareResult;
|
121
|
}
|
122
|
|
123
|
|
124
|
/**
|
125
|
* Compare 2 names which have the same basionym.
|
126
|
* The names must not be equal to each other but may be equal
|
127
|
* to the basionym.
|
128
|
* @param basionym the basionym
|
129
|
* @param name1 first name to compare
|
130
|
* @param name2 second name to compare
|
131
|
* @return compare value according to the {@link Comparator#compare(Object, Object)} contract.
|
132
|
*/
|
133
|
private int handleSameBasionym(TaxonNameBase<?, ?> basionym,
|
134
|
TaxonNameBase<?, ?> name1,
|
135
|
TaxonNameBase<?, ?> name2) {
|
136
|
|
137
|
if (basionym.equals(name1)){
|
138
|
return -1;
|
139
|
}else if (basionym.equals(name2)){
|
140
|
return 1;
|
141
|
}else{
|
142
|
this.compare(name1, name2, false);
|
143
|
}
|
144
|
return 0;
|
145
|
}
|
146
|
|
147
|
/**
|
148
|
* @param basionym1
|
149
|
* @param basionym2
|
150
|
* @return
|
151
|
*/
|
152
|
private int compareBasionyms(TaxonNameBase<?,?> basionym1Orig, TaxonNameBase<?,?> basionym2Orig) {
|
153
|
//one taxon is first in group
|
154
|
TaxonNameBase<?,?> basionym1 = getFirstNameInGroup(basionym1Orig);
|
155
|
TaxonNameBase<?,?> basionym2 = getFirstNameInGroup(basionym2Orig);
|
156
|
|
157
|
//handle accepted taxon case
|
158
|
if (basionym1.equals(firstNameInGroup)){
|
159
|
return -1;
|
160
|
}else if (basionym2.equals(firstNameInGroup)){
|
161
|
return 1;
|
162
|
}
|
163
|
|
164
|
//handle replaced synonyms
|
165
|
boolean basio2IsReplacedSynForBasio1 = getReplacedSynonymClosure(basionym1).contains(basionym2);
|
166
|
boolean basio1IsReplacedSynForBasio2 = getReplacedSynonymClosure(basionym2).contains(basionym1);
|
167
|
|
168
|
if (basio2IsReplacedSynForBasio1 && !basio1IsReplacedSynForBasio2){
|
169
|
return 1;
|
170
|
}else if (basio1IsReplacedSynForBasio2 && !basio2IsReplacedSynForBasio1){
|
171
|
return -1;
|
172
|
}
|
173
|
|
174
|
//compare by date, nom. illeg., rank and alphabetically
|
175
|
return this.compare(basionym1, basionym2, false);
|
176
|
|
177
|
}
|
178
|
|
179
|
/**
|
180
|
* @param basionym
|
181
|
* @return
|
182
|
*/
|
183
|
private TaxonNameBase<?, ?> getFirstNameInGroup(TaxonNameBase<?, ?> basionym) {
|
184
|
for (NameRelationship nameRel : basionym.getRelationsFromThisName()){
|
185
|
if (nameRel.getType() != null && nameRel.getType().equals(NameRelationshipType.BASIONYM())){
|
186
|
if (nameRel.getToName().equals(firstNameInGroup)){
|
187
|
return firstNameInGroup;
|
188
|
}
|
189
|
}
|
190
|
}
|
191
|
return basionym;
|
192
|
}
|
193
|
|
194
|
/**
|
195
|
* @param basionym1
|
196
|
* @return
|
197
|
*/
|
198
|
@SuppressWarnings("rawtypes")
|
199
|
private Set<TaxonNameBase> getReplacedSynonymClosure(TaxonNameBase<?, ?> name) {
|
200
|
Set<TaxonNameBase> set = name.getReplacedSynonyms();
|
201
|
if (set.isEmpty()){
|
202
|
return set;
|
203
|
}
|
204
|
Set<TaxonNameBase> result = new HashSet<TaxonNameBase>();
|
205
|
for (TaxonNameBase<?,?> replSyn : set){
|
206
|
boolean notYetContained = result.add(replSyn);
|
207
|
if (notYetContained){
|
208
|
result.addAll(replSyn.getReplacedSynonyms());
|
209
|
}
|
210
|
}
|
211
|
return result;
|
212
|
}
|
213
|
|
214
|
/**
|
215
|
* @param name
|
216
|
* @return
|
217
|
*/
|
218
|
private TaxonNameBase<?,?> getPreferredInBasionymGroup(TaxonNameBase<?,?> name) {
|
219
|
Set<TaxonNameBase<?,?>> candidates = new HashSet<TaxonNameBase<?,?>>();
|
220
|
//get all final basionyms, except for those being part of a basionym circle
|
221
|
for (TaxonNameBase<?,?> candidate : name.getBasionyms()){
|
222
|
if (candidate != null
|
223
|
&& candidate.getHomotypicalGroup().equals(name.getHomotypicalGroup())
|
224
|
&& !hasBasionymCircle(candidate, null)){
|
225
|
candidate = getPreferredInBasionymGroup(candidate);
|
226
|
candidates.add(candidate);
|
227
|
}
|
228
|
}
|
229
|
|
230
|
if (candidates.isEmpty()){
|
231
|
return name;
|
232
|
}else if (candidates.size() == 1){
|
233
|
return candidates.iterator().next();
|
234
|
}else{
|
235
|
TaxonNameBase<?,?> result = candidates.iterator().next();
|
236
|
candidates.remove(result);
|
237
|
for (TaxonNameBase<?,?> candidate : candidates){
|
238
|
if (this.compare(result, candidate) > 0){
|
239
|
result = candidate;
|
240
|
}
|
241
|
}
|
242
|
return result;
|
243
|
}
|
244
|
}
|
245
|
|
246
|
/**
|
247
|
* @param candidate
|
248
|
* @return
|
249
|
*/
|
250
|
private boolean hasBasionymCircle(TaxonNameBase<?, ?> name, Set<TaxonNameBase<?,?>> existing) {
|
251
|
if (existing == null){
|
252
|
existing = new HashSet<TaxonNameBase<?,?>>();
|
253
|
}
|
254
|
if (existing.contains(name)){
|
255
|
return true;
|
256
|
}else{
|
257
|
Set<TaxonNameBase> basionyms = name.getBasionyms();
|
258
|
if (basionyms.isEmpty()){
|
259
|
return false;
|
260
|
}
|
261
|
existing.add(name);
|
262
|
for (TaxonNameBase basionym : basionyms){
|
263
|
if (hasBasionymCircle(basionym, existing)){
|
264
|
return true;
|
265
|
}
|
266
|
}
|
267
|
return false;
|
268
|
}
|
269
|
}
|
270
|
|
271
|
|
272
|
// /**
|
273
|
// * @param homotypicalGroup
|
274
|
// * @return
|
275
|
// */
|
276
|
// private TaxonBase<?> getFirstInHomotypicalGroup(HomotypicalGroup homotypicalGroup, Collection<TaxonBase<?>> existing) {
|
277
|
// List<TaxonBase<?>> candidates = new ArrayList<TaxonBase<?>>();
|
278
|
// for (TaxonBase<?> candidate : existing){
|
279
|
// if (homotypicalGroup.getTypifiedNames().contains(candidate.getName())){
|
280
|
// candidates.add(candidate);
|
281
|
// }
|
282
|
// }
|
283
|
// Collections.sort(candidates, this);
|
284
|
// return candidates.isEmpty() ? null : candidates.get(0);
|
285
|
// }
|
286
|
|
287
|
/**
|
288
|
* @param taxonNameBase
|
289
|
* @param taxonNameBase2
|
290
|
* @param statusCompareWeight
|
291
|
* @return
|
292
|
*/
|
293
|
protected int compareStatus(TaxonNameBase<?,?> taxonNameBase, TaxonNameBase<?,?> taxonNameBase2) {
|
294
|
int statusCompareWeight = 0;
|
295
|
statusCompareWeight += computeStatusCompareWeight(taxonNameBase);
|
296
|
statusCompareWeight -= computeStatusCompareWeight(taxonNameBase2);
|
297
|
return statusCompareWeight;
|
298
|
}
|
299
|
|
300
|
/**
|
301
|
* @param taxonBase1
|
302
|
* @param statusCompareWeight
|
303
|
* @return
|
304
|
*/
|
305
|
private int computeStatusCompareWeight(TaxonNameBase<?,?> taxonNameBase) {
|
306
|
int result = 0;
|
307
|
if (taxonNameBase == null || taxonNameBase.getStatus() == null){
|
308
|
return 0;
|
309
|
}
|
310
|
Set<NomenclaturalStatus> status1 = taxonNameBase.getStatus();
|
311
|
for (NomenclaturalStatus nomStatus1 : status1){
|
312
|
NomenclaturalStatusType type = nomStatus1.getType();
|
313
|
if (type != null && type.isInvalidType()){
|
314
|
if(type.equals(NomenclaturalStatusType.PROVISIONAL())){
|
315
|
result += 1;
|
316
|
}else if (type.equals(NomenclaturalStatusType.INVALID())){
|
317
|
result += 2;
|
318
|
}else if(type.equals(NomenclaturalStatusType.COMBINATION_INVALID())){
|
319
|
result += 2;
|
320
|
}else if (type.equals(NomenclaturalStatusType.OPUS_UTIQUE_OPPR())){
|
321
|
result += 2;
|
322
|
}else if(type.equals(NomenclaturalStatusType.NUDUM())){
|
323
|
result += 3;
|
324
|
}
|
325
|
result += 1;
|
326
|
}
|
327
|
}
|
328
|
return result;
|
329
|
}
|
330
|
/**
|
331
|
*
|
332
|
* @param name1
|
333
|
* @param name2
|
334
|
* @param includeNomIlleg if true and if both names have no date or same date, the only
|
335
|
* name having nom. illeg. state is handled as if the name was published later than the name
|
336
|
* without status nom. illeg.
|
337
|
* @return
|
338
|
*/
|
339
|
protected int compare(TaxonNameBase<?,?> name1, TaxonNameBase<?,?> name2, boolean includeNomIlleg) {
|
340
|
int result;
|
341
|
|
342
|
//dates
|
343
|
Integer intDate1 = getIntegerDate(name1);
|
344
|
Integer intDate2 = getIntegerDate(name2);
|
345
|
|
346
|
if (intDate1 == null && intDate2 == null){
|
347
|
result = 0;
|
348
|
}else if (intDate1 == null){
|
349
|
return 1;
|
350
|
}else if (intDate2 == null){
|
351
|
return -1;
|
352
|
}else{
|
353
|
result = intDate1.compareTo(intDate2);
|
354
|
}
|
355
|
|
356
|
//nom. illeg.
|
357
|
if (result == 0 && includeNomIlleg){
|
358
|
result = compareNomIlleg(name1, name2);
|
359
|
if (result != 0){
|
360
|
return result;
|
361
|
}
|
362
|
}
|
363
|
|
364
|
if (result == 0 && includeRanks){
|
365
|
Rank rank1 = name1 == null? null : name1.getRank();
|
366
|
Rank rank2 = name2 == null? null : name2.getRank();
|
367
|
|
368
|
if (rank1 == null && rank2 == null){
|
369
|
result = 0;
|
370
|
}else if (rank1 == null){
|
371
|
return 1;
|
372
|
}else if (rank2 == null){
|
373
|
return -1;
|
374
|
}else{
|
375
|
//for some strange reason compareTo for ranks returns 1 if rank2 is lower. So we add minus (-)
|
376
|
result = - rank1.compareTo(rank2);
|
377
|
}
|
378
|
}
|
379
|
|
380
|
if (result == 0 && name1 != null && name2 != null){
|
381
|
result = name1.compareToName(name2);
|
382
|
if (result != 0){
|
383
|
return result;
|
384
|
}
|
385
|
}
|
386
|
return result;
|
387
|
}
|
388
|
|
389
|
private Integer getIntegerDate(TaxonNameBase<?,?> name){
|
390
|
Integer result;
|
391
|
|
392
|
if (name == null){
|
393
|
result = null;
|
394
|
}else{
|
395
|
if (name.isZoological()){
|
396
|
result = name.getPublicationYear();
|
397
|
}else{
|
398
|
Reference ref = (Reference) name.getNomenclaturalReference();
|
399
|
if (ref == null){
|
400
|
result = null;
|
401
|
}else{
|
402
|
if (ref.getDatePublished() == null){
|
403
|
Reference inRef = ref.getInReference();
|
404
|
if (inRef == null){
|
405
|
result = null;
|
406
|
}else{
|
407
|
if (inRef.getDatePublished() == null){
|
408
|
result = null;
|
409
|
}else{
|
410
|
result = ref.getInReference().getDatePublished().getStartYear();
|
411
|
}
|
412
|
}
|
413
|
}else{
|
414
|
result = ref.getDatePublished().getStartYear();
|
415
|
}
|
416
|
}
|
417
|
}
|
418
|
}
|
419
|
|
420
|
return result;
|
421
|
}
|
422
|
|
423
|
protected int compareNomIlleg(TaxonNameBase<?,?> taxonNameBase1, TaxonNameBase<?,?> taxonNameBase2) {
|
424
|
int isNomIlleg1 = isNomIlleg(taxonNameBase1);
|
425
|
int isNomIlleg2 = isNomIlleg(taxonNameBase2);
|
426
|
return isNomIlleg1 - isNomIlleg2;
|
427
|
}
|
428
|
|
429
|
private int isNomIlleg(TaxonNameBase<?,?> taxonNameBase) {
|
430
|
if (taxonNameBase == null || taxonNameBase.getStatus() == null){
|
431
|
return 0;
|
432
|
}
|
433
|
Set<NomenclaturalStatus> status = taxonNameBase.getStatus();
|
434
|
for (NomenclaturalStatus nomStatus : status){
|
435
|
if (nomStatus.getType() != null){
|
436
|
if (nomStatus.getType().equals(NomenclaturalStatusType.ILLEGITIMATE())){
|
437
|
return 1;
|
438
|
}
|
439
|
}
|
440
|
}
|
441
|
return 0;
|
442
|
}
|
443
|
|
444
|
}
|