1
|
/**
|
2
|
* Copyright (C) 2017 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.common.utils;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.HashSet;
|
14
|
import java.util.List;
|
15
|
import java.util.Map;
|
16
|
import java.util.Optional;
|
17
|
import java.util.Set;
|
18
|
import java.util.UUID;
|
19
|
import java.util.function.Predicate;
|
20
|
|
21
|
import org.apache.logging.log4j.LogManager;import org.apache.logging.log4j.Logger;
|
22
|
|
23
|
import eu.etaxonomy.cdm.api.application.ICdmRepository;
|
24
|
import eu.etaxonomy.cdm.api.service.IService;
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.io.common.ImportResult;
|
27
|
import eu.etaxonomy.cdm.io.common.ImportStateBase;
|
28
|
import eu.etaxonomy.cdm.model.agent.AgentBase;
|
29
|
import eu.etaxonomy.cdm.model.agent.Institution;
|
30
|
import eu.etaxonomy.cdm.model.agent.Person;
|
31
|
import eu.etaxonomy.cdm.model.agent.Team;
|
32
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
33
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
34
|
import eu.etaxonomy.cdm.model.common.ICdmBase;
|
35
|
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
|
36
|
import eu.etaxonomy.cdm.model.media.Rights;
|
37
|
import eu.etaxonomy.cdm.model.media.RightsType;
|
38
|
import eu.etaxonomy.cdm.model.name.HybridRelationship;
|
39
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
40
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
41
|
import eu.etaxonomy.cdm.model.occurrence.Collection;
|
42
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
43
|
import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
|
44
|
import eu.etaxonomy.cdm.strategy.match.IMatchStrategy;
|
45
|
import eu.etaxonomy.cdm.strategy.match.IMatchStrategyEqual;
|
46
|
import eu.etaxonomy.cdm.strategy.match.IMatchable;
|
47
|
import eu.etaxonomy.cdm.strategy.match.MatchException;
|
48
|
import eu.etaxonomy.cdm.strategy.match.MatchMode;
|
49
|
import eu.etaxonomy.cdm.strategy.match.MatchStrategyFactory;
|
50
|
|
51
|
/**
|
52
|
* Helper class for deduplicating authors, references, names, etc.
|
53
|
* during import.
|
54
|
*
|
55
|
* Note 2021: Was originally used as fast deduplication tool for commandline imports
|
56
|
* into empty databases. Currently it is transformed into a deduplication tool that
|
57
|
* can be used during application based imports.
|
58
|
*
|
59
|
* @author a.mueller
|
60
|
* @since 11.02.2017
|
61
|
*/
|
62
|
/**
|
63
|
* @author a.mueller
|
64
|
* @date 27.01.2022
|
65
|
*
|
66
|
*/
|
67
|
public class ImportDeduplicationHelper {
|
68
|
|
69
|
private static final Logger logger = LogManager.getLogger(ImportDeduplicationHelper.class);
|
70
|
|
71
|
private ICdmRepository repository;
|
72
|
|
73
|
//for possible future use
|
74
|
@SuppressWarnings("unused")
|
75
|
private ImportStateBase<?,?> state;
|
76
|
|
77
|
public static final int NEVER_USE_MAP = 0;
|
78
|
public static final int ALWAYS_USE_MAP = -1;
|
79
|
//should deduplication use maps indexing the full database content? If yes, what is the maximum number of records for this.
|
80
|
//If more records exist deduplication is done on the fly.
|
81
|
//0 = never use map
|
82
|
//-1 = always use map
|
83
|
private int maxCountFullLoad = ALWAYS_USE_MAP;
|
84
|
public int getMaxCountFullLoad() {
|
85
|
return maxCountFullLoad;
|
86
|
}
|
87
|
public void setMaxCountFullLoad(int maxCountFullLoad) {
|
88
|
this.maxCountFullLoad = maxCountFullLoad;
|
89
|
}
|
90
|
|
91
|
private enum Status{
|
92
|
NOT_INIT,
|
93
|
USE_MAP,
|
94
|
USE_REPO;
|
95
|
}
|
96
|
|
97
|
private class DedupInfo<S extends IMatchable>{
|
98
|
Class<S> clazz;
|
99
|
IMatchStrategyEqual defaultMatcher;
|
100
|
IMatchStrategy parsedMatcher;
|
101
|
Map<String, Set<S>> map = new HashMap<>();
|
102
|
Status status = Status.NOT_INIT;
|
103
|
|
104
|
@SuppressWarnings("unchecked")
|
105
|
private DedupInfo(Class<S> clazz, DedupMap dedupMap){
|
106
|
this.clazz = clazz;
|
107
|
if (IMatchable.class.isAssignableFrom(clazz)) {
|
108
|
defaultMatcher = DefaultMatchStrategy.NewInstance(clazz);
|
109
|
if (Reference.class.isAssignableFrom(clazz)) {
|
110
|
parsedMatcher = MatchStrategyFactory.NewParsedReferenceInstance();
|
111
|
}else if (TeamOrPersonBase.class.isAssignableFrom(clazz)) {
|
112
|
parsedMatcher = MatchStrategyFactory.NewParsedTeamOrPersonInstance();
|
113
|
// }else if (TaxonName.class.isAssignableFrom(clazz)){
|
114
|
// parsedMatcher = MatchStrategyFactory.NewParsedTaxonNameInstance();
|
115
|
}
|
116
|
}
|
117
|
dedupMap.put(clazz, this);
|
118
|
}
|
119
|
@Override
|
120
|
public String toString() {
|
121
|
return clazz.getSimpleName() + ":" + status.name()+":mapsize=" + map.size()+":"+ (defaultMatcher == null?"without":"with") + " defaultMatcher" + (parsedMatcher == null? "" : " and with parsedMatcher");
|
122
|
}
|
123
|
}
|
124
|
|
125
|
private class DedupMap<T extends IMatchable> extends HashMap<Class<T>, DedupInfo<T>>{
|
126
|
private static final long serialVersionUID = 3757206594833330646L;
|
127
|
}
|
128
|
private DedupMap<? extends IdentifiableEntity> dedupMap = new DedupMap<>();
|
129
|
|
130
|
private DedupInfo<Reference> referenceDedupInfo = new DedupInfo<>(Reference.class, dedupMap);
|
131
|
private DedupInfo<Person> personDedupInfo = new DedupInfo<>(Person.class, dedupMap);
|
132
|
private DedupInfo<Team> teamDedupInfo = new DedupInfo<>(Team.class, dedupMap);
|
133
|
private DedupInfo<TaxonName> nameDedupInfo = new DedupInfo<>(TaxonName.class, dedupMap);
|
134
|
|
135
|
|
136
|
@SuppressWarnings("unused")
|
137
|
private Status institutionStatus = Status.NOT_INIT;
|
138
|
private Status copyrightStatus = Status.NOT_INIT;
|
139
|
private Status collectionStatus = Status.NOT_INIT;
|
140
|
|
141
|
private Map<String, Set<Institution>> institutionMap = new HashMap<>();
|
142
|
//using titleCache
|
143
|
private Map<String, Set<Rights>> copyrightMap = new HashMap<>();
|
144
|
private Map<String, Set<Collection>> collectionMap = new HashMap<>();
|
145
|
|
146
|
/**
|
147
|
* Clears all internal maps.
|
148
|
*/
|
149
|
public void reset() {
|
150
|
dedupMap.values().forEach(di->{di.map.clear();di.status=Status.NOT_INIT;});
|
151
|
institutionMap.clear();
|
152
|
copyrightMap.clear();
|
153
|
collectionMap.clear();
|
154
|
}
|
155
|
|
156
|
// private IMatchStrategy collectionMatcher = DefaultMatchStrategy.NewInstance(Collection.class);
|
157
|
|
158
|
// ************************** FACTORY *******************************/
|
159
|
|
160
|
public static <STATE extends ImportStateBase<?,?>> ImportDeduplicationHelper NewInstance(ICdmRepository repository, STATE state){
|
161
|
return new ImportDeduplicationHelper(repository, state);
|
162
|
}
|
163
|
|
164
|
// ************************ CONSTRUCTOR *****************************/
|
165
|
|
166
|
private ImportDeduplicationHelper(ICdmRepository repository, ImportStateBase<?,?> state) {
|
167
|
this.repository = repository;
|
168
|
if (repository == null){
|
169
|
logger.warn("Repository is null. Deduplication does not work against database.");
|
170
|
}
|
171
|
if (state == null){
|
172
|
logger.warn("State is null. Deduplication works without state.");
|
173
|
}
|
174
|
this.state = state;
|
175
|
try {
|
176
|
dedupMap.get(Reference.class).defaultMatcher.setMatchMode("title", MatchMode.EQUAL);
|
177
|
dedupMap.get(Team.class).defaultMatcher.setMatchMode("nomenclaturalTitleCache", MatchMode.EQUAL);
|
178
|
} catch (MatchException e) {
|
179
|
throw new RuntimeException(e); //should not happen
|
180
|
}
|
181
|
}
|
182
|
|
183
|
public void restartSession(){
|
184
|
restartSession(repository, null);
|
185
|
}
|
186
|
|
187
|
/**
|
188
|
* Clears all internal maps and loads them with same data as before but in current session.
|
189
|
*/
|
190
|
public void restartSession(ICdmRepository repository, ImportResult importResult){
|
191
|
if (repository == null){
|
192
|
return;
|
193
|
}
|
194
|
referenceDedupInfo.map = refreshSetMap(referenceDedupInfo.map, (IService)repository.getReferenceService(), importResult);
|
195
|
personDedupInfo.map = refreshSetMap(personDedupInfo.map, (IService)repository.getAgentService(), importResult);
|
196
|
teamDedupInfo.map = refreshSetMap(teamDedupInfo.map, (IService)repository.getAgentService(), importResult);
|
197
|
institutionMap = refreshSetMap(institutionMap, (IService)repository.getAgentService(), importResult);
|
198
|
|
199
|
nameDedupInfo.map = refreshSetMap(nameDedupInfo.map, (IService)repository.getNameService(), importResult);
|
200
|
collectionMap = refreshSetMap(collectionMap, (IService)repository.getCollectionService(), importResult);
|
201
|
copyrightMap = refreshSetMap(copyrightMap, (IService)repository.getRightsService(), importResult);
|
202
|
}
|
203
|
|
204
|
//maybe this was used for Institution before
|
205
|
private <T extends ICdmBase> Map<String, T> refreshMap(Map<String, T> oldMap,
|
206
|
IService<T> service, ImportResult importResult) {
|
207
|
|
208
|
Map<String, T> newMap = new HashMap<>();
|
209
|
for (String key : oldMap.keySet()){
|
210
|
T old = oldMap.get(key);
|
211
|
if (old!= null){
|
212
|
T cdmBase = service.find(old.getUuid());
|
213
|
if (cdmBase == null){
|
214
|
String message = "No cdm object was found for uuid " + old.getUuid() + " of class " + old.getClass().getSimpleName();
|
215
|
importResult.addWarning(message);
|
216
|
}else{
|
217
|
newMap.put(key, CdmBase.deproxy(cdmBase));
|
218
|
}
|
219
|
}else{
|
220
|
String message = "Value for key " + key + " was null in deduplication map";
|
221
|
importResult.addWarning(message);
|
222
|
}
|
223
|
}
|
224
|
return newMap;
|
225
|
}
|
226
|
|
227
|
private <T extends ICdmBase> Map<String, Set<T>> refreshSetMap(Map<String, Set<T>> oldMap,
|
228
|
IService<T> service, ImportResult importResult) {
|
229
|
|
230
|
Map<String, Set<T>> newMap = new HashMap<>();
|
231
|
//create UUID set
|
232
|
Set<UUID> uuidSet = new HashSet<>();
|
233
|
for (String key : oldMap.keySet()){
|
234
|
Set<T> oldSet = oldMap.get(key);
|
235
|
for (T item : oldSet){
|
236
|
UUID uuid = item.getUuid();
|
237
|
uuidSet.add(uuid);
|
238
|
}
|
239
|
}
|
240
|
//create uuid-item map
|
241
|
Map<UUID, T> itemMap = new HashMap<>();
|
242
|
List<T> list = service.find(uuidSet);
|
243
|
for (T item : list){
|
244
|
itemMap.put(item.getUuid(), item);
|
245
|
}
|
246
|
//refresh
|
247
|
for (String key : oldMap.keySet()){
|
248
|
Set<T> oldSet = oldMap.get(key);
|
249
|
Set<T> newSet = new HashSet<>();
|
250
|
if (oldSet != null){
|
251
|
newMap.put(key, newSet);
|
252
|
for (T item : oldSet){
|
253
|
T cdmBase = CdmBase.deproxy(itemMap.get(item.getUuid()));
|
254
|
if (cdmBase == null){
|
255
|
String message = "No cdm object was found for uuid " + item.getUuid() + " of class " + item.getClass().getSimpleName();
|
256
|
importResult.addWarning(message);
|
257
|
}else{
|
258
|
newSet.add(cdmBase);
|
259
|
}
|
260
|
}
|
261
|
}else{
|
262
|
String message = "Value for key " + key + " was null in deduplication map";
|
263
|
importResult.addWarning(message);
|
264
|
}
|
265
|
}
|
266
|
return newMap;
|
267
|
}
|
268
|
|
269
|
//************************ PUTTER / GETTER *****************************/
|
270
|
|
271
|
//ENTITY
|
272
|
private <S extends IdentifiableEntity<?>> void putEntity(String title, S entity, Map<String,Set<S>> map){
|
273
|
Set<S> entitySet = map.get(title);
|
274
|
if (entitySet == null){
|
275
|
entitySet = new HashSet<>();
|
276
|
map.put(title, entitySet);
|
277
|
}
|
278
|
entitySet.add(CdmBase.deproxy(entity));
|
279
|
}
|
280
|
|
281
|
private <S extends IMatchable> Set<S> getEntityByTitle(String title, DedupInfo<S> dedupInfo){
|
282
|
return dedupInfo.map.get(title);
|
283
|
}
|
284
|
|
285
|
private <S extends IMatchable> Optional<S> getMatchingEntity(S entityOrig, DedupInfo<S> dedupInfo, boolean parsed){
|
286
|
S entity = CdmBase.deproxy(entityOrig);
|
287
|
//choose matcher depending on the type of matching required. If matching of a parsed entity is required
|
288
|
// try to use the parsed matcher (if it exists)
|
289
|
IMatchStrategy matcher = parsed && dedupInfo.parsedMatcher != null ? dedupInfo.parsedMatcher : dedupInfo.defaultMatcher;
|
290
|
Predicate<S> matchFilter = persistedEntity ->{
|
291
|
try {
|
292
|
return matcher.invoke((IMatchable)entity, (IMatchable)persistedEntity).isSuccessful();
|
293
|
} catch (MatchException e) {
|
294
|
throw new RuntimeException(e);
|
295
|
}
|
296
|
};
|
297
|
//TODO casting
|
298
|
Optional<S> result = Optional.ofNullable(getEntityByTitle(((IdentifiableEntity<?>)entity).getTitleCache(), dedupInfo))
|
299
|
.orElse(new HashSet<>())
|
300
|
.stream()
|
301
|
.filter(matchFilter)
|
302
|
.findAny();
|
303
|
if (result.isPresent() || dedupInfo.status == Status.USE_MAP || repository == null){
|
304
|
return result;
|
305
|
}else {
|
306
|
try {
|
307
|
return (Optional)repository.getCommonService().findMatching((IMatchable)entity, matcher).stream().findFirst();
|
308
|
} catch (MatchException e) {
|
309
|
throw new RuntimeException(e);
|
310
|
}
|
311
|
}
|
312
|
}
|
313
|
|
314
|
// AGENTS
|
315
|
private void putAgentBase(String title, AgentBase<?> agent){
|
316
|
if (agent.isInstanceOf(Person.class) ){
|
317
|
putEntity(title, CdmBase.deproxy(agent, Person.class), personDedupInfo.map);
|
318
|
}else if (agent.isInstanceOf(Team.class)){
|
319
|
putEntity(title, CdmBase.deproxy(agent, Team.class), teamDedupInfo.map);
|
320
|
}else{
|
321
|
putEntity(title, CdmBase.deproxy(agent, Institution.class), institutionMap);
|
322
|
}
|
323
|
}
|
324
|
|
325
|
private <T extends TeamOrPersonBase<?>> T getTeamOrPerson(T agent, boolean parsed){
|
326
|
T result = agent;
|
327
|
if (agent.isInstanceOf(Person.class)){
|
328
|
result = (T)getMatchingEntity(CdmBase.deproxy(agent, Person.class), personDedupInfo, parsed).orElse(null) ; // personMap.get(title);
|
329
|
}else if (agent.isInstanceOf(Team.class)) {
|
330
|
result = (T)getMatchingEntity(CdmBase.deproxy(agent, Team.class), teamDedupInfo, parsed).orElse(null); // teamMap.get(title);
|
331
|
}
|
332
|
return result;
|
333
|
}
|
334
|
|
335
|
//COLLECTIONS
|
336
|
private Set<Collection> getCollections(String title){
|
337
|
return collectionMap.get(title);
|
338
|
}
|
339
|
|
340
|
private Optional<Collection> getMatchingCollections(Collection existing){
|
341
|
Predicate<Collection> matchFilter = collection ->{
|
342
|
// try {
|
343
|
//TODO right Collection matching
|
344
|
if (CdmUtils.nullSafeEqual(collection.getName(), existing.getName())
|
345
|
&& CdmUtils.nullSafeEqual(collection.getCode(), existing.getCode())){
|
346
|
return true;
|
347
|
}else{
|
348
|
return false;
|
349
|
}
|
350
|
// return collectionMatcher.invoke(collection, existing);
|
351
|
// } catch (MatchException e) {
|
352
|
// throw new RuntimeException(e);
|
353
|
// }
|
354
|
};
|
355
|
return Optional.ofNullable(getCollections(existing.getTitleCache()))
|
356
|
.orElse(new HashSet<>())
|
357
|
.stream()
|
358
|
.filter(matchFilter)
|
359
|
.findAny();
|
360
|
}
|
361
|
|
362
|
// **************************** METHODS *****************************/
|
363
|
|
364
|
/**
|
365
|
* This method replaces name authors, nomenclatural reference and
|
366
|
* nomenclatural reference author by existing authors and references
|
367
|
* if matching authors or references exist. If not, the given authors
|
368
|
* and references are added to the map of existing entities.
|
369
|
*
|
370
|
* @param state the import state
|
371
|
* @param name the name with authors and references to replace
|
372
|
*/
|
373
|
public void replaceAuthorNamesAndNomRef(INonViralName name) {
|
374
|
|
375
|
boolean parsed = true;
|
376
|
TeamOrPersonBase<?> combAuthor = name.getCombinationAuthorship();
|
377
|
name.setCombinationAuthorship(getExistingAuthor(combAuthor, parsed));
|
378
|
if (combAuthor == name.getCombinationAuthorship()) {
|
379
|
replaceTeamMembers(combAuthor, parsed);
|
380
|
}
|
381
|
|
382
|
TeamOrPersonBase<?> exAuthor = name.getExCombinationAuthorship();
|
383
|
name.setExCombinationAuthorship(getExistingAuthor(exAuthor, parsed));
|
384
|
if (exAuthor == name.getExCombinationAuthorship()) {
|
385
|
replaceTeamMembers(exAuthor, parsed);
|
386
|
}
|
387
|
|
388
|
TeamOrPersonBase<?> basioAuthor = name.getBasionymAuthorship();
|
389
|
name.setBasionymAuthorship(getExistingAuthor(basioAuthor, parsed));
|
390
|
if (basioAuthor == name.getBasionymAuthorship()) {
|
391
|
replaceTeamMembers(basioAuthor, parsed);
|
392
|
}
|
393
|
|
394
|
TeamOrPersonBase<?> exBasioAuthor = name.getExBasionymAuthorship();
|
395
|
name.setExBasionymAuthorship(getExistingAuthor(exBasioAuthor, parsed));
|
396
|
if (exBasioAuthor == name.getExBasionymAuthorship()) {
|
397
|
replaceTeamMembers(exBasioAuthor, parsed);
|
398
|
}
|
399
|
|
400
|
Reference newNomRef = name.getNomenclaturalReference();
|
401
|
Reference newOrExistingNomRef = getExistingReference(newNomRef, parsed);
|
402
|
if (newNomRef != null) {
|
403
|
if (newOrExistingNomRef == newNomRef){
|
404
|
replaceReferenceRelatedData(newNomRef, parsed);
|
405
|
}else {
|
406
|
name.setNomenclaturalReference(newOrExistingNomRef);
|
407
|
}
|
408
|
}
|
409
|
}
|
410
|
|
411
|
public void replaceReferenceRelatedData(Reference ref, boolean parsed) {
|
412
|
|
413
|
//author
|
414
|
TeamOrPersonBase<?> newAuthor = ref.getAuthorship();
|
415
|
TeamOrPersonBase<?> newOrExistingAuthor = getExistingAuthor(newAuthor, parsed);
|
416
|
if (newAuthor != null) {
|
417
|
if (newOrExistingAuthor == newAuthor) {
|
418
|
replaceTeamMembers(newAuthor, parsed);
|
419
|
}else {
|
420
|
ref.setAuthorship(newOrExistingAuthor);
|
421
|
}
|
422
|
}
|
423
|
|
424
|
//in-ref
|
425
|
Reference newInRef = ref.getInReference();
|
426
|
Reference newOrExistingInRef = getExistingReference(newInRef, parsed);
|
427
|
if (newInRef != null) {
|
428
|
if (newOrExistingInRef == newInRef){
|
429
|
replaceReferenceRelatedData(newInRef, parsed);
|
430
|
}else {
|
431
|
ref.setInReference(newOrExistingInRef);
|
432
|
}
|
433
|
}
|
434
|
}
|
435
|
|
436
|
private void replaceTeamMembers(TeamOrPersonBase<?> teamOrPerson, boolean parsed) {
|
437
|
if (teamOrPerson != null && teamOrPerson.isInstanceOf(Team.class)) {
|
438
|
Team team = CdmBase.deproxy(teamOrPerson, Team.class);
|
439
|
|
440
|
for (int i = 0; i < team.getTeamMembers().size(); i++) {
|
441
|
Person person = team.getTeamMembers().get(i);
|
442
|
team.getTeamMembers().set(i, getExistingAuthor(person, parsed));
|
443
|
}
|
444
|
}
|
445
|
}
|
446
|
|
447
|
public <T extends TeamOrPersonBase<?>> T getExistingAuthor(T author, boolean parsed) {
|
448
|
if (author == null){
|
449
|
return null;
|
450
|
}else{
|
451
|
init(personDedupInfo);
|
452
|
init(teamDedupInfo);
|
453
|
initAuthorTitleCaches(author);
|
454
|
T result = getTeamOrPerson(author, parsed);
|
455
|
if (result == null){
|
456
|
putAgentBase(author.getTitleCache(), author);
|
457
|
if (author.isInstanceOf(Team.class)){
|
458
|
handleTeam(CdmBase.deproxy(author, Team.class), parsed);
|
459
|
}
|
460
|
result = author;
|
461
|
}
|
462
|
return result;
|
463
|
}
|
464
|
}
|
465
|
|
466
|
private <T extends TeamOrPersonBase<?>> void initAuthorTitleCaches(T teamOrPerson) {
|
467
|
if (teamOrPerson == null) {
|
468
|
return;
|
469
|
}
|
470
|
//NOTE: this is more or less redundant copy from CdmPreDataChangeListener
|
471
|
if (teamOrPerson.isInstanceOf(Team.class)){
|
472
|
Team team = CdmBase.deproxy(teamOrPerson, Team.class);
|
473
|
if (!team.isProtectedNomenclaturalTitleCache()){
|
474
|
team.setNomenclaturalTitleCache(null, false);
|
475
|
}
|
476
|
if (!team.isProtectedCollectorTitleCache()){
|
477
|
team.setCollectorTitleCache(null, false);
|
478
|
}
|
479
|
//not redundant part
|
480
|
for (Person member : team.getTeamMembers()) {
|
481
|
initAuthorTitleCaches(member);
|
482
|
}
|
483
|
//end not redundant part
|
484
|
}
|
485
|
teamOrPerson.getNomenclaturalTitleCache();
|
486
|
teamOrPerson.getCollectorTitleCache();
|
487
|
if (! teamOrPerson.isProtectedTitleCache()){
|
488
|
teamOrPerson.setTitleCache(teamOrPerson.generateTitle(), false);
|
489
|
}
|
490
|
}
|
491
|
|
492
|
private void initReferenceCaches(Reference ref) {
|
493
|
if (ref == null) {
|
494
|
return;
|
495
|
}
|
496
|
////TODO better do via matching strategy (newReference might have caches == null)
|
497
|
//the below is more or less a copy from CdmPreDataChangeListener (except for inReference handling)
|
498
|
ref.getAbbrevTitleCache();
|
499
|
ref.getTitleCache();
|
500
|
|
501
|
initAuthorTitleCaches(ref.getAuthorship());
|
502
|
initReferenceCaches(ref.getInReference());
|
503
|
}
|
504
|
|
505
|
public AgentBase<?> getExistingAgent(AgentBase<?> agent, boolean parsed) {
|
506
|
if (agent == null){
|
507
|
return null;
|
508
|
} else if (agent.isInstanceOf(TeamOrPersonBase.class)){
|
509
|
return getExistingAuthor(CdmBase.deproxy(agent, TeamOrPersonBase.class), parsed);
|
510
|
}else{
|
511
|
throw new RuntimeException("Institution matching not yet implemented");
|
512
|
// initInstitutionMap();
|
513
|
// Set<Institution> result = institutionMap.get(agent.getTitleCache());
|
514
|
// if (result == null){
|
515
|
// result = putEntity(agent.getTitleCache(), CdmBase.deproxy(agent, Institution.class), institutionMap);
|
516
|
// }
|
517
|
// return result;
|
518
|
}
|
519
|
}
|
520
|
|
521
|
private <S extends IMatchable> void init(DedupInfo<S> dedupInfo) {
|
522
|
dedupInfo.status = init(dedupInfo.clazz, dedupInfo.status, dedupInfo.map);
|
523
|
}
|
524
|
|
525
|
private <S extends IMatchable> Status init(Class<S> clazz, Status status, Map<String,Set<S>> map) {
|
526
|
|
527
|
//FIXME cast
|
528
|
Class<IdentifiableEntity> entityClass = (Class)clazz;
|
529
|
if (status == Status.NOT_INIT && repository != null){
|
530
|
if (maxCountFullLoad != NEVER_USE_MAP){
|
531
|
long nExisting = -2;
|
532
|
if (maxCountFullLoad != ALWAYS_USE_MAP){
|
533
|
nExisting = repository.getCommonService().count(entityClass);
|
534
|
}
|
535
|
if (nExisting <= maxCountFullLoad ){
|
536
|
List<String> propertyPaths = Arrays.asList("");
|
537
|
List<IdentifiableEntity> existingEntities = repository.getCommonService().list(entityClass, null, null, null, propertyPaths);
|
538
|
for (IdentifiableEntity<?> entity : existingEntities){
|
539
|
//TODO casting
|
540
|
putEntity(entity.getTitleCache(), entity, (Map)map);
|
541
|
}
|
542
|
return Status.USE_MAP;
|
543
|
}else{
|
544
|
return Status.USE_REPO;
|
545
|
}
|
546
|
}else{
|
547
|
return Status.USE_REPO;
|
548
|
}
|
549
|
}
|
550
|
return status;
|
551
|
}
|
552
|
|
553
|
private void handleTeam(Team team, boolean parsed) {
|
554
|
List<Person> members = team.getTeamMembers();
|
555
|
for (int i =0; i< members.size(); i++){
|
556
|
Person person = CdmBase.deproxy(members.get(i));
|
557
|
Person existingPerson = getMatchingEntity(person, personDedupInfo, parsed).orElse(null);
|
558
|
if (existingPerson != null){
|
559
|
members.set(i, existingPerson);
|
560
|
}else{
|
561
|
putAgentBase(person.getTitleCache(), person);
|
562
|
}
|
563
|
}
|
564
|
}
|
565
|
|
566
|
public Collection getExistingCollection(Collection collection) {
|
567
|
if (collection == null){
|
568
|
return null;
|
569
|
}else{
|
570
|
initCollectionMap();
|
571
|
Collection result = getMatchingCollections(collection).orElse(null);
|
572
|
if (result == null){
|
573
|
result = collection;
|
574
|
putEntity(result.getTitleCache(), result, collectionMap);
|
575
|
}else{
|
576
|
if(logger.isDebugEnabled()) {
|
577
|
logger.debug("Matches");
|
578
|
}
|
579
|
}
|
580
|
return result;
|
581
|
}
|
582
|
}
|
583
|
|
584
|
private void initCollectionMap() {
|
585
|
if (collectionStatus == Status.NOT_INIT && repository != null){
|
586
|
List<String> propertyPaths = Arrays.asList("");
|
587
|
List<Collection> existingCollections = repository.getCollectionService().list(null, null, null, null, propertyPaths);
|
588
|
for (Collection collection : existingCollections){
|
589
|
putEntity(collection.getTitleCache(), collection, collectionMap);
|
590
|
}
|
591
|
}
|
592
|
collectionStatus = Status.USE_MAP;
|
593
|
// collectionStatus = init(Collection.class, collectionStatus, collectionMap); //for future, once Collection becomes IMatchable
|
594
|
}
|
595
|
|
596
|
/**
|
597
|
* Returns an existing matching persistend reference or the the given reference
|
598
|
* if no matching reference exists.
|
599
|
* @param ref given reference
|
600
|
* @param parsed if <code>true</code> use matching strategy for parsed references,
|
601
|
* the default matching strategy otherwise
|
602
|
* @return matching reference
|
603
|
*/
|
604
|
public Reference getExistingReference(Reference ref, boolean parsed) {
|
605
|
if (ref == null){
|
606
|
return null;
|
607
|
}else{
|
608
|
init(referenceDedupInfo);
|
609
|
initReferenceCaches(ref);
|
610
|
Reference result = getMatchingEntity(ref, referenceDedupInfo, parsed).orElse(null);
|
611
|
if (result == null){
|
612
|
result = ref;
|
613
|
Reference inRef = result.getInReference();
|
614
|
if (inRef != null){
|
615
|
result.setInReference(getExistingReference(result.getInReference(), parsed));
|
616
|
}
|
617
|
putEntity(result.getTitleCache(), result, referenceDedupInfo.map);
|
618
|
}else{
|
619
|
if(logger.isDebugEnabled()) {logger.debug("Matches");}
|
620
|
}
|
621
|
return result;
|
622
|
}
|
623
|
}
|
624
|
|
625
|
public TaxonName getExistingName(TaxonName name, boolean parsed) {
|
626
|
if (name == null){
|
627
|
return null;
|
628
|
}else{
|
629
|
init(nameDedupInfo);
|
630
|
TaxonName result = getMatchingEntity(name, nameDedupInfo, parsed).orElse(null);
|
631
|
if (result == null){
|
632
|
result = name;
|
633
|
Set<HybridRelationship> parentRelations = result.getHybridChildRelations();
|
634
|
for (HybridRelationship rel : parentRelations){
|
635
|
TaxonName parent = rel.getParentName();
|
636
|
if (parent != null){
|
637
|
rel.setParentName(getExistingName(parent, parsed));
|
638
|
}
|
639
|
}
|
640
|
putEntity(result.getTitleCache(), result, nameDedupInfo.map);
|
641
|
}else{
|
642
|
if(logger.isDebugEnabled()) {
|
643
|
logger.debug("Matches");
|
644
|
}
|
645
|
}
|
646
|
return result;
|
647
|
}
|
648
|
}
|
649
|
|
650
|
public Rights getExistingCopyright(Rights right) {
|
651
|
if (right == null || !RightsType.COPYRIGHT().equals(right.getType())){
|
652
|
return null;
|
653
|
}else{
|
654
|
initCopyrightMap();
|
655
|
String key = makeCopyrightKey(right);
|
656
|
Set<Rights> set = copyrightMap.get(key);
|
657
|
if (set == null || set.isEmpty()){
|
658
|
putCopyright(key, right);
|
659
|
return right;
|
660
|
}else if (set.size()>1){
|
661
|
//TODO
|
662
|
logger.warn("More than 1 matching copyright not yet handled for key: " + key);
|
663
|
}
|
664
|
return set.iterator().next();
|
665
|
}
|
666
|
}
|
667
|
|
668
|
private void initCopyrightMap() {
|
669
|
if (copyrightStatus == Status.NOT_INIT && repository != null){
|
670
|
List<String> propertyPaths = Arrays.asList("");
|
671
|
List<Rights> existingRights = repository.getRightsService().list(null, null, null, null, propertyPaths);
|
672
|
for (Rights right : existingRights){
|
673
|
if (RightsType.COPYRIGHT().equals(right.getType())){
|
674
|
putCopyright(makeCopyrightKey(right), right);
|
675
|
}
|
676
|
}
|
677
|
copyrightStatus = Status.USE_MAP;
|
678
|
}
|
679
|
}
|
680
|
|
681
|
private void putCopyright(String key, Rights right) {
|
682
|
Set<Rights> rights = copyrightMap.get(key);
|
683
|
if (rights == null){
|
684
|
rights = new HashSet<>();
|
685
|
copyrightMap.put(key, rights);
|
686
|
}
|
687
|
rights.add(CdmBase.deproxy(right));
|
688
|
}
|
689
|
|
690
|
private String makeCopyrightKey(Rights right) {
|
691
|
if (right.getAgent() != null){
|
692
|
return right.getAgent().getTitleCache();
|
693
|
}else if (right.getText() != null){
|
694
|
return right.getText();
|
695
|
}else {
|
696
|
logger.warn("Key for copyright could not be created: " + right);
|
697
|
return right.getUuid().toString();
|
698
|
}
|
699
|
}
|
700
|
|
701
|
}
|