1
|
/**
|
2
|
* Copyright (C) 2017 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.common.utils;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.HashSet;
|
14
|
import java.util.List;
|
15
|
import java.util.Map;
|
16
|
import java.util.Optional;
|
17
|
import java.util.Set;
|
18
|
import java.util.function.Predicate;
|
19
|
|
20
|
import org.apache.log4j.Logger;
|
21
|
|
22
|
import eu.etaxonomy.cdm.api.application.ICdmRepository;
|
23
|
import eu.etaxonomy.cdm.api.service.IService;
|
24
|
import eu.etaxonomy.cdm.io.common.ImportResult;
|
25
|
import eu.etaxonomy.cdm.io.common.ImportStateBase;
|
26
|
import eu.etaxonomy.cdm.model.agent.AgentBase;
|
27
|
import eu.etaxonomy.cdm.model.agent.Institution;
|
28
|
import eu.etaxonomy.cdm.model.agent.Person;
|
29
|
import eu.etaxonomy.cdm.model.agent.Team;
|
30
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
31
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
32
|
import eu.etaxonomy.cdm.model.common.ICdmBase;
|
33
|
import eu.etaxonomy.cdm.model.media.Rights;
|
34
|
import eu.etaxonomy.cdm.model.media.RightsType;
|
35
|
import eu.etaxonomy.cdm.model.name.HybridRelationship;
|
36
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
37
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
38
|
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
|
39
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
40
|
import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
|
41
|
import eu.etaxonomy.cdm.strategy.match.IMatchStrategy;
|
42
|
import eu.etaxonomy.cdm.strategy.match.MatchException;
|
43
|
|
44
|
/**
|
45
|
* Helper class for deduplicating authors, references, names, etc.
|
46
|
* during import.
|
47
|
* @author a.mueller
|
48
|
* @date 11.02.2017
|
49
|
*
|
50
|
*/
|
51
|
public class ImportDeduplicationHelper<STATE extends ImportStateBase<?,?>> {
|
52
|
private static final Logger logger = Logger.getLogger(ImportDeduplicationHelper.class);
|
53
|
|
54
|
private ICdmRepository repository;
|
55
|
|
56
|
boolean referenceMapIsInitialized = false;
|
57
|
boolean nameMapIsInitialized = false;
|
58
|
boolean agentMapIsInitialized = false;
|
59
|
boolean copyrightMapIsInitialized = false;
|
60
|
|
61
|
private Map<String, Set<Reference>> refMap = new HashMap<>();
|
62
|
private Map<String, Team> teamMap = new HashMap<>();
|
63
|
private Map<String, Person> personMap = new HashMap<>();
|
64
|
private Map<String, Institution> institutionMap = new HashMap<>();
|
65
|
//using titleCache
|
66
|
private Map<String, Set<INonViralName>> nameMap = new HashMap<>();
|
67
|
private Map<String, Set<Rights>> copyrightMap = new HashMap<>();
|
68
|
|
69
|
|
70
|
private IMatchStrategy referenceMatcher = DefaultMatchStrategy.NewInstance(Reference.class);
|
71
|
private IMatchStrategy nameMatcher = DefaultMatchStrategy.NewInstance(TaxonName.class);
|
72
|
|
73
|
|
74
|
|
75
|
public void restartSession(){
|
76
|
restartSession(repository, null);
|
77
|
}
|
78
|
|
79
|
public void restartSession(ICdmRepository repository, ImportResult importResult){
|
80
|
if (repository == null){
|
81
|
return;
|
82
|
}
|
83
|
personMap = refreshMap(personMap, (IService)repository.getAgentService(), importResult);
|
84
|
teamMap = refreshMap(teamMap, (IService)repository.getAgentService(), importResult);
|
85
|
institutionMap = refreshMap(institutionMap, (IService)repository.getAgentService(), importResult);
|
86
|
}
|
87
|
|
88
|
|
89
|
/**
|
90
|
* @param oldMap
|
91
|
* @param service
|
92
|
* @return
|
93
|
*/
|
94
|
private <T extends ICdmBase> Map<String, T> refreshMap(Map<String, T> oldMap,
|
95
|
IService<T> service, ImportResult importResult) {
|
96
|
Map<String, T> newMap = new HashMap<>();
|
97
|
for (String key : oldMap.keySet()){
|
98
|
T old = oldMap.get(key);
|
99
|
if (old!= null){
|
100
|
T cdmBase = service.find(old.getUuid());
|
101
|
if (cdmBase == null){
|
102
|
String message = "No cdm object was found for uuid " + old.getUuid() + " of class " + old.getClass().getSimpleName();
|
103
|
importResult.addWarning(message);
|
104
|
}else{
|
105
|
newMap.put(key, cdmBase);
|
106
|
}
|
107
|
}else{
|
108
|
String message = "Value for key " + key + " was null in deduplication map";
|
109
|
importResult.addWarning(message);
|
110
|
}
|
111
|
}
|
112
|
return newMap;
|
113
|
}
|
114
|
|
115
|
// ************************** FACTORY *******************************/
|
116
|
|
117
|
public static ImportDeduplicationHelper<?> NewInstance(ICdmRepository repository){
|
118
|
return new ImportDeduplicationHelper<>(repository);
|
119
|
}
|
120
|
|
121
|
public static ImportDeduplicationHelper<?> NewStandaloneInstance(){
|
122
|
return new ImportDeduplicationHelper<>(null);
|
123
|
}
|
124
|
|
125
|
/**
|
126
|
* @param repository
|
127
|
* @param state not used, only for correct casting of generics
|
128
|
* @return
|
129
|
*/
|
130
|
public static <STATE extends ImportStateBase<?,?>> ImportDeduplicationHelper<STATE> NewInstance(ICdmRepository repository, STATE state){
|
131
|
return new ImportDeduplicationHelper<>(repository);
|
132
|
}
|
133
|
|
134
|
// ************************ CONSTRUCTOR *****************************/
|
135
|
|
136
|
public ImportDeduplicationHelper(ICdmRepository repository) {
|
137
|
this.repository = repository;
|
138
|
if (repository == null){
|
139
|
logger.warn("Repository is null. Deduplication does not work against database");
|
140
|
}
|
141
|
}
|
142
|
|
143
|
//************************ PUTTER / GETTER *****************************/
|
144
|
|
145
|
//REFERENCES
|
146
|
private void putReference(String title, Reference ref){
|
147
|
Set<Reference> refs = refMap.get(title);
|
148
|
if (refs == null){
|
149
|
refs = new HashSet<>();
|
150
|
refMap.put(title, refs);
|
151
|
}
|
152
|
refs.add(ref);
|
153
|
}
|
154
|
private Set<Reference> getReferences(String title){
|
155
|
return refMap.get(title);
|
156
|
}
|
157
|
|
158
|
private Optional<Reference> getMatchingReference(Reference existing){
|
159
|
Predicate<Reference> matchFilter = reference ->{
|
160
|
try {
|
161
|
return referenceMatcher.invoke(reference, existing);
|
162
|
} catch (MatchException e) {
|
163
|
throw new RuntimeException(e);
|
164
|
}
|
165
|
};
|
166
|
return Optional.ofNullable(getReferences(existing.getTitleCache()))
|
167
|
.orElse(new HashSet<>())
|
168
|
.stream()
|
169
|
.filter(matchFilter)
|
170
|
.findAny();
|
171
|
}
|
172
|
|
173
|
// AGENTS
|
174
|
private void putAgentBase(String title, AgentBase<?> agent){
|
175
|
if (agent.isInstanceOf(Person.class) ){
|
176
|
personMap.put(title, CdmBase.deproxy(agent, Person.class));
|
177
|
}else if (agent.isInstanceOf(Team.class)){
|
178
|
teamMap.put(title, CdmBase.deproxy(agent, Team.class));
|
179
|
}else{
|
180
|
institutionMap.put(title, CdmBase.deproxy(agent, Institution.class));
|
181
|
}
|
182
|
}
|
183
|
|
184
|
private TeamOrPersonBase<?> getAgentBase(String title){
|
185
|
TeamOrPersonBase<?> result = personMap.get(title);
|
186
|
if (result == null){
|
187
|
result = teamMap.get(title);
|
188
|
}
|
189
|
return result;
|
190
|
}
|
191
|
|
192
|
private Person getPerson(String title){
|
193
|
return personMap.get(title);
|
194
|
}
|
195
|
|
196
|
//NAMES
|
197
|
private void putName(String title, INonViralName name){
|
198
|
Set<INonViralName> names = nameMap.get(title);
|
199
|
if (names == null){
|
200
|
names = new HashSet<>();
|
201
|
nameMap.put(title, names);
|
202
|
}
|
203
|
names.add(name);
|
204
|
}
|
205
|
private Set<INonViralName> getNames(String title){
|
206
|
return nameMap.get(title);
|
207
|
}
|
208
|
|
209
|
private Optional<INonViralName> getMatchingName(INonViralName existing){
|
210
|
Predicate<INonViralName> matchFilter = name ->{
|
211
|
try {
|
212
|
return nameMatcher.invoke(name, existing);
|
213
|
} catch (MatchException e) {
|
214
|
throw new RuntimeException(e);
|
215
|
}
|
216
|
};
|
217
|
return Optional.ofNullable(getNames(existing.getTitleCache()))
|
218
|
.orElse(new HashSet<>())
|
219
|
.stream()
|
220
|
.filter(matchFilter)
|
221
|
.findAny();
|
222
|
}
|
223
|
|
224
|
// **************************** METHODS *****************************/
|
225
|
|
226
|
/**
|
227
|
* This method replaces name authors, nomenclatural reference and
|
228
|
* nomenclatural reference author by existing authors and references
|
229
|
* if matching authors or references exist. If not, the given authors
|
230
|
* and references are added to the map of existing entities.
|
231
|
*
|
232
|
* @param state the import state
|
233
|
* @param name the name with authors and references to replace
|
234
|
*/
|
235
|
public void replaceAuthorNamesAndNomRef(STATE state,
|
236
|
INonViralName name) {
|
237
|
TeamOrPersonBase<?> combAuthor = name.getCombinationAuthorship();
|
238
|
name.setCombinationAuthorship(getExistingAuthor(state, combAuthor));
|
239
|
|
240
|
TeamOrPersonBase<?> exAuthor = name.getExCombinationAuthorship();
|
241
|
name.setExCombinationAuthorship(getExistingAuthor(state, exAuthor));
|
242
|
|
243
|
TeamOrPersonBase<?> basioAuthor = name.getBasionymAuthorship();
|
244
|
name.setBasionymAuthorship(getExistingAuthor(state, basioAuthor));
|
245
|
|
246
|
TeamOrPersonBase<?> exBasioAuthor = name.getExBasionymAuthorship();
|
247
|
name.setExBasionymAuthorship(getExistingAuthor(state, exBasioAuthor));
|
248
|
|
249
|
INomenclaturalReference nomRef = name.getNomenclaturalReference();
|
250
|
if (nomRef != null){
|
251
|
TeamOrPersonBase<?> refAuthor = nomRef.getAuthorship();
|
252
|
nomRef.setAuthorship(getExistingAuthor(state, refAuthor));
|
253
|
|
254
|
Reference existingRef = getExistingReference(state, (Reference)nomRef);
|
255
|
if (existingRef != null){
|
256
|
name.setNomenclaturalReference(existingRef);
|
257
|
}
|
258
|
}
|
259
|
}
|
260
|
|
261
|
/**
|
262
|
* @param state
|
263
|
* @param combAuthor
|
264
|
* @return
|
265
|
*/
|
266
|
public TeamOrPersonBase<?> getExistingAuthor(STATE state,
|
267
|
TeamOrPersonBase<?> author) {
|
268
|
if (author == null){
|
269
|
return null;
|
270
|
}else{
|
271
|
initAgentMap(state);
|
272
|
TeamOrPersonBase<?> result = getAgentBase(author.getTitleCache());
|
273
|
if (result == null){
|
274
|
putAgentBase(author.getTitleCache(), author);
|
275
|
if (author instanceof Team){
|
276
|
handleTeam(state, (Team)author);
|
277
|
}
|
278
|
result = author;
|
279
|
}
|
280
|
return result;
|
281
|
}
|
282
|
}
|
283
|
|
284
|
public AgentBase<?> getExistingAgent(STATE state,
|
285
|
AgentBase<?> agent) {
|
286
|
if (agent == null){
|
287
|
return null;
|
288
|
} else if (agent.isInstanceOf(TeamOrPersonBase.class)){
|
289
|
return getExistingAuthor(state, CdmBase.deproxy(agent, TeamOrPersonBase.class));
|
290
|
}else{
|
291
|
initAgentMap(state);
|
292
|
Institution result = institutionMap.get(agent.getTitleCache());
|
293
|
if (result == null){
|
294
|
putAgentBase(agent.getTitleCache(), agent);
|
295
|
result = CdmBase.deproxy(agent, Institution.class);
|
296
|
}
|
297
|
return result;
|
298
|
}
|
299
|
}
|
300
|
|
301
|
|
302
|
/**
|
303
|
* @param state
|
304
|
*
|
305
|
*/
|
306
|
@SuppressWarnings("rawtypes")
|
307
|
private void initAgentMap(STATE state) {
|
308
|
if (!agentMapIsInitialized && repository != null){
|
309
|
List<String> propertyPaths = Arrays.asList("");
|
310
|
List<AgentBase> existingAgents = repository.getAgentService().list(null, null, null, null, propertyPaths);
|
311
|
for (AgentBase agent : existingAgents){
|
312
|
putAgentBase(agent.getTitleCache(), agent);
|
313
|
}
|
314
|
agentMapIsInitialized = true;
|
315
|
}
|
316
|
}
|
317
|
|
318
|
/**
|
319
|
* @param state
|
320
|
* @param author
|
321
|
*/
|
322
|
private void handleTeam(STATE state, Team team) {
|
323
|
List<Person> members = team.getTeamMembers();
|
324
|
for (int i =0; i< members.size(); i++){
|
325
|
Person person = members.get(i);
|
326
|
Person existingPerson = getPerson(person.getTitleCache());
|
327
|
if (existingPerson != null){
|
328
|
members.set(i, existingPerson);
|
329
|
}else{
|
330
|
putAgentBase(person.getTitleCache(), person);
|
331
|
}
|
332
|
}
|
333
|
}
|
334
|
|
335
|
/**
|
336
|
* @param state
|
337
|
* @param nomRef
|
338
|
*/
|
339
|
public Reference getExistingReference(STATE state, Reference ref) {
|
340
|
if (ref == null){
|
341
|
return null;
|
342
|
}else{
|
343
|
initRerenceMap(state);
|
344
|
Reference result = getMatchingReference(ref).orElse(null);
|
345
|
if (result == null){
|
346
|
result = ref;
|
347
|
Reference inRef = result.getInReference();
|
348
|
if (inRef != null){
|
349
|
result.setInReference(getExistingReference(state, result.getInReference()));
|
350
|
}
|
351
|
putReference(result.getTitleCache(), result);
|
352
|
}else{
|
353
|
if(logger.isDebugEnabled()) {
|
354
|
logger.debug("Matches");
|
355
|
}
|
356
|
}
|
357
|
return result;
|
358
|
}
|
359
|
}
|
360
|
|
361
|
/**
|
362
|
* @param state
|
363
|
*/
|
364
|
private void initRerenceMap(STATE state) {
|
365
|
if (!referenceMapIsInitialized && repository != null){
|
366
|
List<String> propertyPaths = Arrays.asList("");
|
367
|
List<Reference> existingReferences = repository.getReferenceService().list(null, null, null, null, propertyPaths);
|
368
|
for (Reference ref : existingReferences){
|
369
|
putReference(ref.getTitleCache(), ref);
|
370
|
}
|
371
|
referenceMapIsInitialized = true;
|
372
|
}
|
373
|
}
|
374
|
|
375
|
/**
|
376
|
* @param state
|
377
|
* @param name
|
378
|
*/
|
379
|
public <NAME extends INonViralName> NAME getExistingName(STATE state, NAME name) {
|
380
|
if (name == null){
|
381
|
return null;
|
382
|
}else{
|
383
|
initNameMap(state);
|
384
|
@SuppressWarnings("unchecked")
|
385
|
NAME result = (NAME)getMatchingName(name).orElse(null);
|
386
|
if (result == null){
|
387
|
result = name;
|
388
|
Set<HybridRelationship> parentRelations = result.getHybridChildRelations();
|
389
|
for (HybridRelationship rel : parentRelations){
|
390
|
INonViralName parent = rel.getParentName();
|
391
|
if (parent != null){
|
392
|
rel.setParentName(getExistingName(state, parent));
|
393
|
}
|
394
|
}
|
395
|
putName(result.getTitleCache(), result);
|
396
|
}else{
|
397
|
if(logger.isDebugEnabled()) {
|
398
|
logger.debug("Matches");
|
399
|
}
|
400
|
}
|
401
|
return result;
|
402
|
}
|
403
|
}
|
404
|
|
405
|
/**
|
406
|
* @param state
|
407
|
*/
|
408
|
private void initNameMap(STATE state) {
|
409
|
if (!nameMapIsInitialized && repository != null){
|
410
|
List<String> propertyPaths = Arrays.asList("");
|
411
|
List<TaxonName> existingNames = repository.getNameService().list(null, null, null, null, propertyPaths);
|
412
|
for (TaxonName name : existingNames){
|
413
|
putName(name.getTitleCache(), name);
|
414
|
}
|
415
|
nameMapIsInitialized = true;
|
416
|
}
|
417
|
}
|
418
|
|
419
|
public Rights getExistingCopyright(STATE state,
|
420
|
Rights right) {
|
421
|
if (right == null || !RightsType.COPYRIGHT().equals(right.getType())){
|
422
|
return null;
|
423
|
}else{
|
424
|
initCopyrightMap(state);
|
425
|
String key = makeCopyrightKey(right);
|
426
|
Set<Rights> set = copyrightMap.get(key);
|
427
|
if (set == null || set.isEmpty()){
|
428
|
putCopyright(key, right);
|
429
|
return right;
|
430
|
}else if (set.size()>1){
|
431
|
//TODO
|
432
|
logger.warn("More than 1 matching copyright not yet handled for key: " + key);
|
433
|
}
|
434
|
return set.iterator().next();
|
435
|
}
|
436
|
}
|
437
|
|
438
|
/**
|
439
|
* @param state
|
440
|
*/
|
441
|
private void initCopyrightMap(STATE state) {
|
442
|
if (!copyrightMapIsInitialized && repository != null){
|
443
|
List<String> propertyPaths = Arrays.asList("");
|
444
|
List<Rights> existingRights = repository.getRightsService().list(null, null, null, null, propertyPaths);
|
445
|
for (Rights right : existingRights){
|
446
|
if (RightsType.COPYRIGHT().equals(right.getType())){
|
447
|
putCopyright(makeCopyrightKey(right), right);
|
448
|
}
|
449
|
}
|
450
|
copyrightMapIsInitialized = true;
|
451
|
}
|
452
|
|
453
|
}
|
454
|
|
455
|
/**
|
456
|
* @param makeCopyrightKey
|
457
|
* @param right
|
458
|
*/
|
459
|
private void putCopyright(String key, Rights right) {
|
460
|
Set<Rights> rights = copyrightMap.get(key);
|
461
|
if (rights == null){
|
462
|
rights = new HashSet<>();
|
463
|
copyrightMap.put(key, rights);
|
464
|
}
|
465
|
rights.add(right);
|
466
|
|
467
|
}
|
468
|
|
469
|
/**
|
470
|
* @param right
|
471
|
* @return
|
472
|
*/
|
473
|
private String makeCopyrightKey(Rights right) {
|
474
|
if (right.getAgent() != null){
|
475
|
return right.getAgent().getTitleCache();
|
476
|
}else if (right.getText() != null){
|
477
|
return right.getText();
|
478
|
}else {
|
479
|
logger.warn("Key for copyright could not be created: " + right);
|
480
|
return right.getUuid().toString();
|
481
|
}
|
482
|
}
|
483
|
|
484
|
|
485
|
}
|