1
|
/**
|
2
|
* Copyright (C) 2017 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.common.utils;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.HashSet;
|
14
|
import java.util.List;
|
15
|
import java.util.Map;
|
16
|
import java.util.Optional;
|
17
|
import java.util.Set;
|
18
|
import java.util.UUID;
|
19
|
import java.util.function.Predicate;
|
20
|
|
21
|
import org.apache.log4j.Logger;
|
22
|
|
23
|
import eu.etaxonomy.cdm.api.application.ICdmRepository;
|
24
|
import eu.etaxonomy.cdm.api.service.IService;
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.io.common.ImportResult;
|
27
|
import eu.etaxonomy.cdm.io.common.ImportStateBase;
|
28
|
import eu.etaxonomy.cdm.model.agent.AgentBase;
|
29
|
import eu.etaxonomy.cdm.model.agent.Institution;
|
30
|
import eu.etaxonomy.cdm.model.agent.Person;
|
31
|
import eu.etaxonomy.cdm.model.agent.Team;
|
32
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
33
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
34
|
import eu.etaxonomy.cdm.model.common.ICdmBase;
|
35
|
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
|
36
|
import eu.etaxonomy.cdm.model.media.Rights;
|
37
|
import eu.etaxonomy.cdm.model.media.RightsType;
|
38
|
import eu.etaxonomy.cdm.model.name.HybridRelationship;
|
39
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
40
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
41
|
import eu.etaxonomy.cdm.model.occurrence.Collection;
|
42
|
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
|
43
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
44
|
import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
|
45
|
import eu.etaxonomy.cdm.strategy.match.IMatchStrategyEqual;
|
46
|
import eu.etaxonomy.cdm.strategy.match.IMatchable;
|
47
|
import eu.etaxonomy.cdm.strategy.match.MatchException;
|
48
|
import eu.etaxonomy.cdm.strategy.match.MatchMode;
|
49
|
|
50
|
/**
|
51
|
* Helper class for deduplicating authors, references, names, etc.
|
52
|
* during import.
|
53
|
*
|
54
|
* Note 2021: Was originally used as fast deduplication tool for commandline imports
|
55
|
* into empty databases. Currently it is transformed into a deduplication tool that
|
56
|
* can be used during application based imports.
|
57
|
*
|
58
|
* @author a.mueller
|
59
|
* @since 11.02.2017
|
60
|
*/
|
61
|
public class ImportDeduplicationHelper {
|
62
|
|
63
|
private static final Logger logger = Logger.getLogger(ImportDeduplicationHelper.class);
|
64
|
|
65
|
private ICdmRepository repository;
|
66
|
|
67
|
//for possible future use
|
68
|
@SuppressWarnings("unused")
|
69
|
private ImportStateBase<?,?> state;
|
70
|
|
71
|
public static final int NEVER_USE_MAP = 0;
|
72
|
public static final int ALWAYS_USE_MAP = -1;
|
73
|
//should deduplication use maps indexing the full database content? If yes, what is the maximum number of records for this.
|
74
|
//If more records exist deduplication is done on the fly.
|
75
|
//0 = never use map
|
76
|
//-1 = always use map
|
77
|
private int maxCountFullLoad = ALWAYS_USE_MAP;
|
78
|
public int getMaxCountFullLoad() {
|
79
|
return maxCountFullLoad;
|
80
|
}
|
81
|
public void setMaxCountFullLoad(int maxCountFullLoad) {
|
82
|
this.maxCountFullLoad = maxCountFullLoad;
|
83
|
}
|
84
|
|
85
|
private enum Status{
|
86
|
NOT_INIT,
|
87
|
USE_MAP,
|
88
|
USE_REPO;
|
89
|
}
|
90
|
|
91
|
private class DedupInfo<S extends IMatchable>{
|
92
|
Class<S> clazz;
|
93
|
IMatchStrategyEqual matcher;
|
94
|
Map<String, Set<S>> map = new HashMap<>();
|
95
|
Status status = Status.NOT_INIT;
|
96
|
|
97
|
@SuppressWarnings("unchecked")
|
98
|
private DedupInfo(Class<S> clazz, DedupMap dedupMap){
|
99
|
this.clazz = clazz;
|
100
|
if (IMatchable.class.isAssignableFrom(clazz)) {
|
101
|
matcher = DefaultMatchStrategy.NewInstance(clazz);
|
102
|
}
|
103
|
dedupMap.put(clazz, this);
|
104
|
}
|
105
|
@Override
|
106
|
public String toString() {
|
107
|
return clazz.getSimpleName() + ":" + status.name()+":mapsize=" + map.size()+":"+ (matcher == null?"without":"with") + " matcher";
|
108
|
}
|
109
|
}
|
110
|
|
111
|
private class DedupMap<T extends IMatchable> extends HashMap<Class<T>, DedupInfo<T>>{
|
112
|
private static final long serialVersionUID = 3757206594833330646L;
|
113
|
}
|
114
|
private DedupMap<? extends IdentifiableEntity> dedupMap = new DedupMap<>();
|
115
|
|
116
|
private DedupInfo<Reference> referenceDedupInfo = new DedupInfo<>(Reference.class, dedupMap);
|
117
|
private DedupInfo<Person> personDedupInfo = new DedupInfo<>(Person.class, dedupMap);
|
118
|
private DedupInfo<Team> teamDedupInfo = new DedupInfo<>(Team.class, dedupMap);
|
119
|
private DedupInfo<TaxonName> nameDedupInfo = new DedupInfo<>(TaxonName.class, dedupMap);
|
120
|
|
121
|
|
122
|
@SuppressWarnings("unused")
|
123
|
private Status institutionStatus = Status.NOT_INIT;
|
124
|
private Status copyrightStatus = Status.NOT_INIT;
|
125
|
private Status collectionStatus = Status.NOT_INIT;
|
126
|
|
127
|
private Map<String, Set<Institution>> institutionMap = new HashMap<>();
|
128
|
//using titleCache
|
129
|
private Map<String, Set<Rights>> copyrightMap = new HashMap<>();
|
130
|
private Map<String, Set<Collection>> collectionMap = new HashMap<>();
|
131
|
|
132
|
/**
|
133
|
* Clears all internal maps.
|
134
|
*/
|
135
|
public void reset() {
|
136
|
dedupMap.values().forEach(di->di.map.clear());
|
137
|
institutionMap.clear();
|
138
|
copyrightMap.clear();
|
139
|
collectionMap.clear();
|
140
|
}
|
141
|
|
142
|
// private IMatchStrategy collectionMatcher = DefaultMatchStrategy.NewInstance(Collection.class);
|
143
|
|
144
|
// ************************** FACTORY *******************************/
|
145
|
|
146
|
public static <STATE extends ImportStateBase<?,?>> ImportDeduplicationHelper NewInstance(ICdmRepository repository, STATE state){
|
147
|
return new ImportDeduplicationHelper(repository, state);
|
148
|
}
|
149
|
|
150
|
// ************************ CONSTRUCTOR *****************************/
|
151
|
|
152
|
private ImportDeduplicationHelper(ICdmRepository repository, ImportStateBase<?,?> state) {
|
153
|
this.repository = repository;
|
154
|
if (repository == null){
|
155
|
logger.warn("Repository is null. Deduplication does not work against database.");
|
156
|
}
|
157
|
if (state == null){
|
158
|
logger.warn("State is null. Deduplication works without state.");
|
159
|
}
|
160
|
this.state = state;
|
161
|
try {
|
162
|
dedupMap.get(Reference.class).matcher.setMatchMode("title", MatchMode.EQUAL);
|
163
|
dedupMap.get(Team.class).matcher.setMatchMode("nomenclaturalTitleCache", MatchMode.EQUAL_OR_SECOND_NULL);
|
164
|
} catch (MatchException e) {
|
165
|
throw new RuntimeException(e); //should not happen
|
166
|
}
|
167
|
}
|
168
|
|
169
|
public void restartSession(){
|
170
|
restartSession(repository, null);
|
171
|
}
|
172
|
|
173
|
/**
|
174
|
* Clears all internal maps and loads them with same data as before but in current session.
|
175
|
*/
|
176
|
public void restartSession(ICdmRepository repository, ImportResult importResult){
|
177
|
if (repository == null){
|
178
|
return;
|
179
|
}
|
180
|
referenceDedupInfo.map = refreshSetMap(referenceDedupInfo.map, (IService)repository.getReferenceService(), importResult);
|
181
|
personDedupInfo.map = refreshSetMap(personDedupInfo.map, (IService)repository.getAgentService(), importResult);
|
182
|
teamDedupInfo.map = refreshSetMap(teamDedupInfo.map, (IService)repository.getAgentService(), importResult);
|
183
|
institutionMap = refreshSetMap(institutionMap, (IService)repository.getAgentService(), importResult);
|
184
|
|
185
|
nameDedupInfo.map = refreshSetMap(nameDedupInfo.map, (IService)repository.getNameService(), importResult);
|
186
|
collectionMap = refreshSetMap(collectionMap, (IService)repository.getCollectionService(), importResult);
|
187
|
copyrightMap = refreshSetMap(copyrightMap, (IService)repository.getRightsService(), importResult);
|
188
|
}
|
189
|
|
190
|
//maybe this was used for Institution before
|
191
|
private <T extends ICdmBase> Map<String, T> refreshMap(Map<String, T> oldMap,
|
192
|
IService<T> service, ImportResult importResult) {
|
193
|
|
194
|
Map<String, T> newMap = new HashMap<>();
|
195
|
for (String key : oldMap.keySet()){
|
196
|
T old = oldMap.get(key);
|
197
|
if (old!= null){
|
198
|
T cdmBase = service.find(old.getUuid());
|
199
|
if (cdmBase == null){
|
200
|
String message = "No cdm object was found for uuid " + old.getUuid() + " of class " + old.getClass().getSimpleName();
|
201
|
importResult.addWarning(message);
|
202
|
}else{
|
203
|
newMap.put(key, CdmBase.deproxy(cdmBase));
|
204
|
}
|
205
|
}else{
|
206
|
String message = "Value for key " + key + " was null in deduplication map";
|
207
|
importResult.addWarning(message);
|
208
|
}
|
209
|
}
|
210
|
return newMap;
|
211
|
}
|
212
|
|
213
|
private <T extends ICdmBase> Map<String, Set<T>> refreshSetMap(Map<String, Set<T>> oldMap,
|
214
|
IService<T> service, ImportResult importResult) {
|
215
|
|
216
|
Map<String, Set<T>> newMap = new HashMap<>();
|
217
|
//create UUID set
|
218
|
Set<UUID> uuidSet = new HashSet<>();
|
219
|
for (String key : oldMap.keySet()){
|
220
|
Set<T> oldSet = oldMap.get(key);
|
221
|
for (T item : oldSet){
|
222
|
UUID uuid = item.getUuid();
|
223
|
uuidSet.add(uuid);
|
224
|
}
|
225
|
}
|
226
|
//create uuid-item map
|
227
|
Map<UUID, T> itemMap = new HashMap<>();
|
228
|
List<T> list = service.find(uuidSet);
|
229
|
for (T item : list){
|
230
|
itemMap.put(item.getUuid(), item);
|
231
|
}
|
232
|
//refresh
|
233
|
for (String key : oldMap.keySet()){
|
234
|
Set<T> oldSet = oldMap.get(key);
|
235
|
Set<T> newSet = new HashSet<>();
|
236
|
if (oldSet != null){
|
237
|
newMap.put(key, newSet);
|
238
|
for (T item : oldSet){
|
239
|
T cdmBase = CdmBase.deproxy(itemMap.get(item.getUuid()));
|
240
|
if (cdmBase == null){
|
241
|
String message = "No cdm object was found for uuid " + item.getUuid() + " of class " + item.getClass().getSimpleName();
|
242
|
importResult.addWarning(message);
|
243
|
}else{
|
244
|
newSet.add(cdmBase);
|
245
|
}
|
246
|
}
|
247
|
}else{
|
248
|
String message = "Value for key " + key + " was null in deduplication map";
|
249
|
importResult.addWarning(message);
|
250
|
}
|
251
|
}
|
252
|
return newMap;
|
253
|
}
|
254
|
|
255
|
//************************ PUTTER / GETTER *****************************/
|
256
|
|
257
|
//ENTITY
|
258
|
private <S extends IdentifiableEntity<?>> void putEntity(String title, S entity, Map<String,Set<S>> map){
|
259
|
Set<S> entitySet = map.get(title);
|
260
|
if (entitySet == null){
|
261
|
entitySet = new HashSet<>();
|
262
|
map.put(title, entitySet);
|
263
|
}
|
264
|
entitySet.add(CdmBase.deproxy(entity));
|
265
|
}
|
266
|
|
267
|
private <S extends IMatchable> Set<S> getEntityByTitle(String title, DedupInfo<S> dedupInfo){
|
268
|
return dedupInfo.map.get(title);
|
269
|
}
|
270
|
|
271
|
private <S extends IMatchable> Optional<S> getMatchingEntity(S entityOrig, DedupInfo<S> dedupInfo){
|
272
|
S entity = CdmBase.deproxy(entityOrig);
|
273
|
Predicate<S> matchFilter = reference ->{
|
274
|
try {
|
275
|
return dedupInfo.matcher.invoke((IMatchable)reference, (IMatchable)entity).isSuccessful();
|
276
|
} catch (MatchException e) {
|
277
|
throw new RuntimeException(e);
|
278
|
}
|
279
|
};
|
280
|
//TODO casting
|
281
|
Optional<S> result = Optional.ofNullable(getEntityByTitle(((IdentifiableEntity<?>)entity).getTitleCache(), dedupInfo))
|
282
|
.orElse(new HashSet<>())
|
283
|
.stream()
|
284
|
.filter(matchFilter)
|
285
|
.findAny();
|
286
|
if (result.isPresent() || dedupInfo.status == Status.USE_MAP || repository == null){
|
287
|
return result;
|
288
|
}else {
|
289
|
try {
|
290
|
return (Optional)repository.getCommonService().findMatching((IMatchable)entity, dedupInfo.matcher).stream().findFirst();
|
291
|
} catch (MatchException e) {
|
292
|
throw new RuntimeException(e);
|
293
|
}
|
294
|
}
|
295
|
}
|
296
|
|
297
|
// AGENTS
|
298
|
private void putAgentBase(String title, AgentBase<?> agent){
|
299
|
if (agent.isInstanceOf(Person.class) ){
|
300
|
putEntity(title, CdmBase.deproxy(agent, Person.class), personDedupInfo.map);
|
301
|
}else if (agent.isInstanceOf(Team.class)){
|
302
|
putEntity(title, CdmBase.deproxy(agent, Team.class), teamDedupInfo.map);
|
303
|
}else{
|
304
|
putEntity(title, CdmBase.deproxy(agent, Institution.class), institutionMap);
|
305
|
}
|
306
|
}
|
307
|
|
308
|
private <T extends TeamOrPersonBase<?>> T getTeamOrPerson(T agent){
|
309
|
T result = agent;
|
310
|
if (agent.isInstanceOf(Person.class)){
|
311
|
result = (T)getMatchingEntity(CdmBase.deproxy(agent, Person.class), personDedupInfo).orElse(null) ; // personMap.get(title);
|
312
|
}else if (agent.isInstanceOf(Team.class)) {
|
313
|
result = (T)getMatchingEntity(CdmBase.deproxy(agent, Team.class), teamDedupInfo).orElse(null); // teamMap.get(title);
|
314
|
}
|
315
|
return result;
|
316
|
}
|
317
|
|
318
|
//COLLECTIONS
|
319
|
private Set<Collection> getCollections(String title){
|
320
|
return collectionMap.get(title);
|
321
|
}
|
322
|
|
323
|
private Optional<Collection> getMatchingCollections(Collection existing){
|
324
|
Predicate<Collection> matchFilter = collection ->{
|
325
|
// try {
|
326
|
//TODO right Collection matching
|
327
|
if (CdmUtils.nullSafeEqual(collection.getName(), existing.getName())
|
328
|
&& CdmUtils.nullSafeEqual(collection.getCode(), existing.getCode())){
|
329
|
return true;
|
330
|
}else{
|
331
|
return false;
|
332
|
}
|
333
|
// return collectionMatcher.invoke(collection, existing);
|
334
|
// } catch (MatchException e) {
|
335
|
// throw new RuntimeException(e);
|
336
|
// }
|
337
|
};
|
338
|
return Optional.ofNullable(getCollections(existing.getTitleCache()))
|
339
|
.orElse(new HashSet<>())
|
340
|
.stream()
|
341
|
.filter(matchFilter)
|
342
|
.findAny();
|
343
|
}
|
344
|
|
345
|
// **************************** METHODS *****************************/
|
346
|
|
347
|
/**
|
348
|
* This method replaces name authors, nomenclatural reference and
|
349
|
* nomenclatural reference author by existing authors and references
|
350
|
* if matching authors or references exist. If not, the given authors
|
351
|
* and references are added to the map of existing entities.
|
352
|
*
|
353
|
* @param state the import state
|
354
|
* @param name the name with authors and references to replace
|
355
|
*/
|
356
|
public void replaceAuthorNamesAndNomRef(INonViralName name) {
|
357
|
|
358
|
TeamOrPersonBase<?> combAuthor = name.getCombinationAuthorship();
|
359
|
name.setCombinationAuthorship(getExistingAuthor(combAuthor));
|
360
|
|
361
|
TeamOrPersonBase<?> exAuthor = name.getExCombinationAuthorship();
|
362
|
name.setExCombinationAuthorship(getExistingAuthor(exAuthor));
|
363
|
|
364
|
TeamOrPersonBase<?> basioAuthor = name.getBasionymAuthorship();
|
365
|
name.setBasionymAuthorship(getExistingAuthor(basioAuthor));
|
366
|
|
367
|
TeamOrPersonBase<?> exBasioAuthor = name.getExBasionymAuthorship();
|
368
|
name.setExBasionymAuthorship(getExistingAuthor(exBasioAuthor));
|
369
|
|
370
|
INomenclaturalReference nomRef = name.getNomenclaturalReference();
|
371
|
if (nomRef != null){
|
372
|
TeamOrPersonBase<?> refAuthor = nomRef.getAuthorship();
|
373
|
nomRef.setAuthorship(getExistingAuthor(refAuthor));
|
374
|
|
375
|
Reference existingRef = getExistingReference((Reference)nomRef);
|
376
|
//TODO AM: why do we need to check null here (we don't do this for authors, maybe because it is an original source?)
|
377
|
if (existingRef != null){
|
378
|
name.setNomenclaturalReference(existingRef);
|
379
|
}
|
380
|
}
|
381
|
}
|
382
|
|
383
|
public void replaceReferenceRelatedData(Reference ref) {
|
384
|
|
385
|
TeamOrPersonBase<?> author = ref.getAuthorship();
|
386
|
ref.setAuthorship(getExistingAuthor(author));
|
387
|
|
388
|
ref.setInReference(getExistingReference(ref.getInReference()));
|
389
|
}
|
390
|
|
391
|
public <T extends TeamOrPersonBase<?>> T getExistingAuthor(T author) {
|
392
|
if (author == null){
|
393
|
return null;
|
394
|
}else{
|
395
|
init(personDedupInfo);
|
396
|
init(teamDedupInfo);
|
397
|
initAuthorTitleCaches(author);
|
398
|
T result = getTeamOrPerson(author);
|
399
|
if (result == null){
|
400
|
putAgentBase(author.getTitleCache(), author);
|
401
|
if (author.isInstanceOf(Team.class)){
|
402
|
handleTeam(CdmBase.deproxy(author, Team.class));
|
403
|
}
|
404
|
result = author;
|
405
|
}
|
406
|
return result;
|
407
|
}
|
408
|
}
|
409
|
|
410
|
private <T extends TeamOrPersonBase<?>> void initAuthorTitleCaches(T teamOrPerson) {
|
411
|
//NOTE: this is more or less redundant copy from CdmPreDataChangeListener
|
412
|
if (teamOrPerson.isInstanceOf(Team.class)){
|
413
|
Team team = CdmBase.deproxy(teamOrPerson, Team.class);
|
414
|
if (!team.isProtectedNomenclaturalTitleCache()){
|
415
|
team.setNomenclaturalTitleCache(null, false);
|
416
|
}
|
417
|
if (!team.isProtectedCollectorTitleCache()){
|
418
|
team.setCollectorTitleCache(null, false);
|
419
|
}
|
420
|
}
|
421
|
teamOrPerson.getNomenclaturalTitleCache();
|
422
|
teamOrPerson.getCollectorTitleCache();
|
423
|
if (! teamOrPerson.isProtectedTitleCache()){
|
424
|
teamOrPerson.setTitleCache(teamOrPerson.generateTitle(), false);
|
425
|
}
|
426
|
}
|
427
|
|
428
|
private void initReferenceCaches(Reference ref) {
|
429
|
////TODO better do via matching strategy (newReference might have caches == null)
|
430
|
//the below is more or less a copy from CdmPreDataChangeListener
|
431
|
ref.getAbbrevTitleCache();
|
432
|
ref.getTitleCache();
|
433
|
}
|
434
|
|
435
|
public AgentBase<?> getExistingAgent(AgentBase<?> agent) {
|
436
|
if (agent == null){
|
437
|
return null;
|
438
|
} else if (agent.isInstanceOf(TeamOrPersonBase.class)){
|
439
|
return getExistingAuthor(CdmBase.deproxy(agent, TeamOrPersonBase.class));
|
440
|
}else{
|
441
|
throw new RuntimeException("Institution matching not yet implemented");
|
442
|
// initInstitutionMap();
|
443
|
// Set<Institution> result = institutionMap.get(agent.getTitleCache());
|
444
|
// if (result == null){
|
445
|
// result = putEntity(agent.getTitleCache(), CdmBase.deproxy(agent, Institution.class), institutionMap);
|
446
|
// }
|
447
|
// return result;
|
448
|
}
|
449
|
}
|
450
|
|
451
|
private <S extends IMatchable> void init(DedupInfo<S> dedupInfo) {
|
452
|
dedupInfo.status = init(dedupInfo.clazz, dedupInfo.status, dedupInfo.map);
|
453
|
}
|
454
|
|
455
|
private <S extends IMatchable> Status init(Class<S> clazz, Status status, Map<String,Set<S>> map) {
|
456
|
|
457
|
Class<IdentifiableEntity> entityClass = (Class<IdentifiableEntity>)clazz;
|
458
|
if (status == Status.NOT_INIT && repository != null){
|
459
|
if (maxCountFullLoad != NEVER_USE_MAP){
|
460
|
long nExisting = -2;
|
461
|
if (maxCountFullLoad != ALWAYS_USE_MAP){
|
462
|
nExisting = repository.getCommonService().count(entityClass);
|
463
|
}
|
464
|
if (nExisting <= maxCountFullLoad ){
|
465
|
List<String> propertyPaths = Arrays.asList("");
|
466
|
List<IdentifiableEntity> existingEntities = repository.getCommonService().list(entityClass, null, null, null, propertyPaths);
|
467
|
for (IdentifiableEntity<?> entity : existingEntities){
|
468
|
//TODO casting
|
469
|
putEntity(entity.getTitleCache(), entity, (Map)map);
|
470
|
}
|
471
|
return Status.USE_MAP;
|
472
|
}else{
|
473
|
return Status.USE_REPO;
|
474
|
}
|
475
|
}else{
|
476
|
return Status.USE_REPO;
|
477
|
}
|
478
|
}
|
479
|
return status;
|
480
|
}
|
481
|
|
482
|
private void handleTeam(Team team) {
|
483
|
List<Person> members = team.getTeamMembers();
|
484
|
for (int i =0; i< members.size(); i++){
|
485
|
Person person = CdmBase.deproxy(members.get(i));
|
486
|
Person existingPerson = getMatchingEntity(person, personDedupInfo).orElse(null);
|
487
|
if (existingPerson != null){
|
488
|
members.set(i, existingPerson);
|
489
|
}else{
|
490
|
putAgentBase(person.getTitleCache(), person);
|
491
|
}
|
492
|
}
|
493
|
}
|
494
|
|
495
|
public Collection getExistingCollection(Collection collection) {
|
496
|
if (collection == null){
|
497
|
return null;
|
498
|
}else{
|
499
|
initCollectionMap();
|
500
|
Collection result = getMatchingCollections(collection).orElse(null);
|
501
|
if (result == null){
|
502
|
result = collection;
|
503
|
putEntity(result.getTitleCache(), result, collectionMap);
|
504
|
}else{
|
505
|
if(logger.isDebugEnabled()) {
|
506
|
logger.debug("Matches");
|
507
|
}
|
508
|
}
|
509
|
return result;
|
510
|
}
|
511
|
}
|
512
|
|
513
|
private void initCollectionMap() {
|
514
|
if (collectionStatus == Status.NOT_INIT && repository != null){
|
515
|
List<String> propertyPaths = Arrays.asList("");
|
516
|
List<Collection> existingCollections = repository.getCollectionService().list(null, null, null, null, propertyPaths);
|
517
|
for (Collection collection : existingCollections){
|
518
|
putEntity(collection.getTitleCache(), collection, collectionMap);
|
519
|
}
|
520
|
}
|
521
|
collectionStatus = Status.USE_MAP;
|
522
|
// collectionStatus = init(Collection.class, collectionStatus, collectionMap); //for future, once Collection becomes IMatchable
|
523
|
}
|
524
|
|
525
|
public Reference getExistingReference(Reference ref) {
|
526
|
if (ref == null){
|
527
|
return null;
|
528
|
}else{
|
529
|
init(referenceDedupInfo);
|
530
|
initReferenceCaches(ref);
|
531
|
Reference result = getMatchingEntity(ref, referenceDedupInfo).orElse(null);
|
532
|
if (result == null){
|
533
|
result = ref;
|
534
|
Reference inRef = result.getInReference();
|
535
|
if (inRef != null){
|
536
|
result.setInReference(getExistingReference(result.getInReference()));
|
537
|
}
|
538
|
putEntity(result.getTitleCache(), result, referenceDedupInfo.map);
|
539
|
}else{
|
540
|
if(logger.isDebugEnabled()) {logger.debug("Matches");}
|
541
|
}
|
542
|
return result;
|
543
|
}
|
544
|
}
|
545
|
|
546
|
public TaxonName getExistingName(TaxonName name) {
|
547
|
if (name == null){
|
548
|
return null;
|
549
|
}else{
|
550
|
init(nameDedupInfo);
|
551
|
TaxonName result = getMatchingEntity(name, nameDedupInfo).orElse(null);
|
552
|
if (result == null){
|
553
|
result = name;
|
554
|
Set<HybridRelationship> parentRelations = result.getHybridChildRelations();
|
555
|
for (HybridRelationship rel : parentRelations){
|
556
|
TaxonName parent = rel.getParentName();
|
557
|
if (parent != null){
|
558
|
rel.setParentName(getExistingName(parent));
|
559
|
}
|
560
|
}
|
561
|
putEntity(result.getTitleCache(), result, nameDedupInfo.map);
|
562
|
}else{
|
563
|
if(logger.isDebugEnabled()) {
|
564
|
logger.debug("Matches");
|
565
|
}
|
566
|
}
|
567
|
return result;
|
568
|
}
|
569
|
}
|
570
|
|
571
|
public Rights getExistingCopyright(Rights right) {
|
572
|
if (right == null || !RightsType.COPYRIGHT().equals(right.getType())){
|
573
|
return null;
|
574
|
}else{
|
575
|
initCopyrightMap();
|
576
|
String key = makeCopyrightKey(right);
|
577
|
Set<Rights> set = copyrightMap.get(key);
|
578
|
if (set == null || set.isEmpty()){
|
579
|
putCopyright(key, right);
|
580
|
return right;
|
581
|
}else if (set.size()>1){
|
582
|
//TODO
|
583
|
logger.warn("More than 1 matching copyright not yet handled for key: " + key);
|
584
|
}
|
585
|
return set.iterator().next();
|
586
|
}
|
587
|
}
|
588
|
|
589
|
private void initCopyrightMap() {
|
590
|
if (copyrightStatus == Status.NOT_INIT && repository != null){
|
591
|
List<String> propertyPaths = Arrays.asList("");
|
592
|
List<Rights> existingRights = repository.getRightsService().list(null, null, null, null, propertyPaths);
|
593
|
for (Rights right : existingRights){
|
594
|
if (RightsType.COPYRIGHT().equals(right.getType())){
|
595
|
putCopyright(makeCopyrightKey(right), right);
|
596
|
}
|
597
|
}
|
598
|
copyrightStatus = Status.USE_MAP;
|
599
|
}
|
600
|
}
|
601
|
|
602
|
private void putCopyright(String key, Rights right) {
|
603
|
Set<Rights> rights = copyrightMap.get(key);
|
604
|
if (rights == null){
|
605
|
rights = new HashSet<>();
|
606
|
copyrightMap.put(key, rights);
|
607
|
}
|
608
|
rights.add(CdmBase.deproxy(right));
|
609
|
}
|
610
|
|
611
|
private String makeCopyrightKey(Rights right) {
|
612
|
if (right.getAgent() != null){
|
613
|
return right.getAgent().getTitleCache();
|
614
|
}else if (right.getText() != null){
|
615
|
return right.getText();
|
616
|
}else {
|
617
|
logger.warn("Key for copyright could not be created: " + right);
|
618
|
return right.getUuid().toString();
|
619
|
}
|
620
|
}
|
621
|
|
622
|
}
|