Project

General

Profile

Download (16.3 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.common.utils;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Optional;
17
import java.util.Set;
18
import java.util.function.Predicate;
19

    
20
import org.apache.log4j.Logger;
21

    
22
import eu.etaxonomy.cdm.api.application.ICdmRepository;
23
import eu.etaxonomy.cdm.api.service.IService;
24
import eu.etaxonomy.cdm.io.common.ImportResult;
25
import eu.etaxonomy.cdm.io.common.ImportStateBase;
26
import eu.etaxonomy.cdm.model.agent.AgentBase;
27
import eu.etaxonomy.cdm.model.agent.Institution;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.CdmBase;
32
import eu.etaxonomy.cdm.model.common.ICdmBase;
33
import eu.etaxonomy.cdm.model.media.Rights;
34
import eu.etaxonomy.cdm.model.media.RightsType;
35
import eu.etaxonomy.cdm.model.name.HybridRelationship;
36
import eu.etaxonomy.cdm.model.name.INonViralName;
37
import eu.etaxonomy.cdm.model.name.TaxonName;
38
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
39
import eu.etaxonomy.cdm.model.reference.Reference;
40
import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
41
import eu.etaxonomy.cdm.strategy.match.IMatchStrategy;
42
import eu.etaxonomy.cdm.strategy.match.MatchException;
43

    
44
/**
45
 * Helper class for deduplicating authors, references, names, etc.
46
 * during import.
47
 * @author a.mueller
48
 * @date 11.02.2017
49
 *
50
 */
51
public class ImportDeduplicationHelper<STATE extends ImportStateBase<?,?>> {
52
    private static final Logger logger = Logger.getLogger(ImportDeduplicationHelper.class);
53

    
54
    private ICdmRepository repository;
55

    
56
    boolean referenceMapIsInitialized = false;
57
    boolean nameMapIsInitialized = false;
58
    boolean agentMapIsInitialized = false;
59
    boolean copyrightMapIsInitialized = false;
60

    
61
    private Map<String, Set<Reference>> refMap = new HashMap<>();
62
    private Map<String, Team> teamMap = new HashMap<>();
63
    private Map<String, Person> personMap = new HashMap<>();
64
    private Map<String, Institution> institutionMap = new HashMap<>();
65
    //using titleCache
66
    private Map<String, Set<INonViralName>> nameMap = new HashMap<>();
67
    private Map<String, Set<Rights>> copyrightMap = new HashMap<>();
68

    
69

    
70
    private IMatchStrategy referenceMatcher = DefaultMatchStrategy.NewInstance(Reference.class);
71
    private IMatchStrategy nameMatcher = DefaultMatchStrategy.NewInstance(TaxonName.class);
72

    
73

    
74

    
75
    public void restartSession(){
76
        restartSession(repository, null);
77
    }
78

    
79
    public void restartSession(ICdmRepository repository, ImportResult importResult){
80
        if (repository == null){
81
            return;
82
        }
83
        personMap = refreshMap(personMap, (IService)repository.getAgentService(), importResult);
84
        teamMap = refreshMap(teamMap, (IService)repository.getAgentService(), importResult);
85
        institutionMap = refreshMap(institutionMap, (IService)repository.getAgentService(), importResult);
86
    }
87

    
88

    
89
    /**
90
     * @param oldMap
91
     * @param service
92
     * @return
93
     */
94
    private <T extends ICdmBase> Map<String, T> refreshMap(Map<String, T> oldMap,
95
            IService<T> service, ImportResult importResult) {
96
        Map<String, T> newMap = new HashMap<>();
97
        for (String key : oldMap.keySet()){
98
            T old = oldMap.get(key);
99
            if (old!= null){
100
                T cdmBase = service.find(old.getUuid());
101
                if (cdmBase == null){
102
                    String message = "No cdm object was found for uuid " + old.getUuid() + " of class " + old.getClass().getSimpleName();
103
                    importResult.addWarning(message);
104
                }else{
105
                    newMap.put(key, cdmBase);
106
                }
107
            }else{
108
                String message = "Value for key " +  key + " was null in deduplication map";
109
                importResult.addWarning(message);
110
            }
111
        }
112
        return newMap;
113
    }
114

    
115
// ************************** FACTORY *******************************/
116

    
117
    public static ImportDeduplicationHelper<?> NewInstance(ICdmRepository repository){
118
        return new ImportDeduplicationHelper<>(repository);
119
    }
120

    
121
    public static ImportDeduplicationHelper<?> NewStandaloneInstance(){
122
        return new ImportDeduplicationHelper<>(null);
123
    }
124

    
125
    /**
126
     * @param repository
127
     * @param state not used, only for correct casting of generics
128
     * @return
129
     */
130
    public static <STATE extends ImportStateBase<?,?>> ImportDeduplicationHelper<STATE> NewInstance(ICdmRepository repository, STATE state){
131
        return new ImportDeduplicationHelper<>(repository);
132
    }
133

    
134
// ************************ CONSTRUCTOR *****************************/
135

    
136
    public ImportDeduplicationHelper(ICdmRepository repository) {
137
        this.repository = repository;
138
        if (repository == null){
139
            logger.warn("Repository is null. Deduplication does not work against database");
140
        }
141
    }
142

    
143
//************************ PUTTER / GETTER *****************************/
144

    
145
    //REFERENCES
146
    private void putReference(String title, Reference ref){
147
        Set<Reference> refs = refMap.get(title);
148
        if (refs == null){
149
            refs = new HashSet<>();
150
            refMap.put(title, refs);
151
        }
152
        refs.add(ref);
153
    }
154
    private Set<Reference> getReferences(String title){
155
        return refMap.get(title);
156
    }
157

    
158
    private Optional<Reference> getMatchingReference(Reference existing){
159
        Predicate<Reference> matchFilter = reference ->{
160
            try {
161
                return referenceMatcher.invoke(reference, existing);
162
            } catch (MatchException e) {
163
                throw new RuntimeException(e);
164
            }
165
        };
166
        return Optional.ofNullable(getReferences(existing.getTitleCache()))
167
                .orElse(new HashSet<>())
168
                .stream()
169
                .filter(matchFilter)
170
                .findAny();
171
    }
172

    
173
    // AGENTS
174
    private void putAgentBase(String title, AgentBase<?> agent){
175
        if (agent.isInstanceOf(Person.class) ){
176
            personMap.put(title, CdmBase.deproxy(agent, Person.class));
177
        }else if (agent.isInstanceOf(Team.class)){
178
            teamMap.put(title, CdmBase.deproxy(agent, Team.class));
179
        }else{
180
            institutionMap.put(title, CdmBase.deproxy(agent, Institution.class));
181
        }
182
    }
183

    
184
    private TeamOrPersonBase<?> getAgentBase(String title){
185
        TeamOrPersonBase<?> result = personMap.get(title);
186
        if (result == null){
187
            result = teamMap.get(title);
188
        }
189
        return result;
190
    }
191

    
192
    private Person getPerson(String title){
193
        return personMap.get(title);
194
    }
195

    
196
    //NAMES
197
    private void putName(String title, INonViralName name){
198
        Set<INonViralName> names = nameMap.get(title);
199
        if (names == null){
200
            names = new HashSet<>();
201
            nameMap.put(title, names);
202
        }
203
        names.add(name);
204
    }
205
    private Set<INonViralName> getNames(String title){
206
        return nameMap.get(title);
207
    }
208

    
209
    private Optional<INonViralName> getMatchingName(INonViralName existing){
210
        Predicate<INonViralName> matchFilter = name ->{
211
            try {
212
                return nameMatcher.invoke(name, existing);
213
            } catch (MatchException e) {
214
                throw new RuntimeException(e);
215
            }
216
        };
217
        return Optional.ofNullable(getNames(existing.getTitleCache()))
218
                .orElse(new HashSet<>())
219
                .stream()
220
                .filter(matchFilter)
221
                .findAny();
222
    }
223

    
224
// **************************** METHODS *****************************/
225

    
226
    /**
227
     * This method replaces name authors, nomenclatural reference and
228
     * nomenclatural reference author by existing authors and references
229
     * if matching authors or references exist. If not, the given authors
230
     * and references are added to the map of existing entities.
231
     *
232
     * @param state the import state
233
     * @param name the name with authors and references to replace
234
     */
235
    public void replaceAuthorNamesAndNomRef(STATE state,
236
            INonViralName name) {
237
        TeamOrPersonBase<?> combAuthor = name.getCombinationAuthorship();
238
        name.setCombinationAuthorship(getExistingAuthor(state, combAuthor));
239

    
240
        TeamOrPersonBase<?> exAuthor = name.getExCombinationAuthorship();
241
        name.setExCombinationAuthorship(getExistingAuthor(state, exAuthor));
242

    
243
        TeamOrPersonBase<?> basioAuthor = name.getBasionymAuthorship();
244
        name.setBasionymAuthorship(getExistingAuthor(state, basioAuthor));
245

    
246
        TeamOrPersonBase<?> exBasioAuthor = name.getExBasionymAuthorship();
247
        name.setExBasionymAuthorship(getExistingAuthor(state, exBasioAuthor));
248

    
249
        INomenclaturalReference nomRef = name.getNomenclaturalReference();
250
        if (nomRef != null){
251
            TeamOrPersonBase<?> refAuthor = nomRef.getAuthorship();
252
            nomRef.setAuthorship(getExistingAuthor(state, refAuthor));
253

    
254
            Reference existingRef = getExistingReference(state, (Reference)nomRef);
255
            if (existingRef != null){
256
                name.setNomenclaturalReference(existingRef);
257
            }
258
        }
259
    }
260

    
261
    /**
262
     * @param state
263
     * @param combAuthor
264
     * @return
265
     */
266
    public TeamOrPersonBase<?> getExistingAuthor(STATE state,
267
            TeamOrPersonBase<?> author) {
268
        if (author == null){
269
            return null;
270
        }else{
271
            initAgentMap(state);
272
            TeamOrPersonBase<?> result = getAgentBase(author.getTitleCache());
273
            if (result == null){
274
                putAgentBase(author.getTitleCache(), author);
275
                if (author instanceof Team){
276
                    handleTeam(state, (Team)author);
277
                }
278
                result = author;
279
            }
280
            return result;
281
        }
282
    }
283

    
284
    public AgentBase<?> getExistingAgent(STATE state,
285
            AgentBase<?> agent) {
286
        if (agent == null){
287
            return null;
288
        } else if (agent.isInstanceOf(TeamOrPersonBase.class)){
289
            return getExistingAuthor(state, CdmBase.deproxy(agent, TeamOrPersonBase.class));
290
        }else{
291
            initAgentMap(state);
292
            Institution result = institutionMap.get(agent.getTitleCache());
293
            if (result == null){
294
                putAgentBase(agent.getTitleCache(), agent);
295
                result = CdmBase.deproxy(agent, Institution.class);
296
            }
297
            return result;
298
        }
299
    }
300

    
301

    
302
    /**
303
     * @param state
304
     *
305
     */
306
    @SuppressWarnings("rawtypes")
307
    private void initAgentMap(STATE state) {
308
        if (!agentMapIsInitialized && repository != null){
309
            List<String> propertyPaths = Arrays.asList("");
310
            List<AgentBase> existingAgents = repository.getAgentService().list(null, null, null, null, propertyPaths);
311
            for (AgentBase agent : existingAgents){
312
                putAgentBase(agent.getTitleCache(), agent);
313
            }
314
            agentMapIsInitialized = true;
315
        }
316
    }
317

    
318
    /**
319
     * @param state
320
     * @param author
321
     */
322
    private void handleTeam(STATE state, Team team) {
323
        List<Person> members = team.getTeamMembers();
324
        for (int i =0; i< members.size(); i++){
325
            Person person = members.get(i);
326
            Person existingPerson = getPerson(person.getTitleCache());
327
            if (existingPerson != null){
328
                members.set(i, existingPerson);
329
            }else{
330
                putAgentBase(person.getTitleCache(), person);
331
            }
332
        }
333
    }
334

    
335
    /**
336
    * @param state
337
    * @param nomRef
338
    */
339
   public Reference getExistingReference(STATE state, Reference ref) {
340
       if (ref == null){
341
           return null;
342
       }else{
343
           initRerenceMap(state);
344
           Reference result = getMatchingReference(ref).orElse(null);
345
           if (result == null){
346
               result = ref;
347
               Reference inRef = result.getInReference();
348
               if (inRef != null){
349
                   result.setInReference(getExistingReference(state, result.getInReference()));
350
               }
351
               putReference(result.getTitleCache(), result);
352
           }else{
353
               if(logger.isDebugEnabled()) {
354
                   logger.debug("Matches");
355
                }
356
           }
357
           return result;
358
       }
359
   }
360

    
361
   /**
362
    * @param state
363
    */
364
   private void initRerenceMap(STATE state) {
365
       if (!referenceMapIsInitialized && repository != null){
366
           List<String> propertyPaths = Arrays.asList("");
367
           List<Reference> existingReferences = repository.getReferenceService().list(null, null, null, null, propertyPaths);
368
           for (Reference ref : existingReferences){
369
               putReference(ref.getTitleCache(), ref);
370
           }
371
           referenceMapIsInitialized = true;
372
       }
373
   }
374

    
375
   /**
376
    * @param state
377
    * @param name
378
    */
379
   public <NAME extends INonViralName> NAME getExistingName(STATE state, NAME name) {
380
       if (name == null){
381
           return null;
382
       }else{
383
           initNameMap(state);
384
           @SuppressWarnings("unchecked")
385
           NAME result = (NAME)getMatchingName(name).orElse(null);
386
           if (result == null){
387
               result = name;
388
               Set<HybridRelationship> parentRelations = result.getHybridChildRelations();
389
               for (HybridRelationship rel : parentRelations){
390
                   INonViralName parent = rel.getParentName();
391
                   if (parent != null){
392
                       rel.setParentName(getExistingName(state, parent));
393
                   }
394
               }
395
               putName(result.getTitleCache(), result);
396
           }else{
397
               if(logger.isDebugEnabled()) {
398
                   logger.debug("Matches");
399
                }
400
           }
401
           return result;
402
       }
403
   }
404

    
405
   /**
406
    * @param state
407
    */
408
   private void initNameMap(STATE state) {
409
       if (!nameMapIsInitialized && repository != null){
410
           List<String> propertyPaths = Arrays.asList("");
411
           List<TaxonName> existingNames = repository.getNameService().list(null, null, null, null, propertyPaths);
412
           for (TaxonName name : existingNames){
413
               putName(name.getTitleCache(), name);
414
           }
415
          nameMapIsInitialized = true;
416
       }
417
   }
418

    
419
   public Rights getExistingCopyright(STATE state,
420
           Rights right) {
421
       if (right == null || !RightsType.COPYRIGHT().equals(right.getType())){
422
           return null;
423
       }else{
424
           initCopyrightMap(state);
425
           String key = makeCopyrightKey(right);
426
           Set<Rights> set = copyrightMap.get(key);
427
           if (set == null || set.isEmpty()){
428
               putCopyright(key, right);
429
               return right;
430
           }else if (set.size()>1){
431
               //TODO
432
               logger.warn("More than 1 matching copyright not yet handled for key: " + key);
433
           }
434
           return set.iterator().next();
435
       }
436
   }
437

    
438
    /**
439
     * @param state
440
     */
441
    private void initCopyrightMap(STATE state) {
442
        if (!copyrightMapIsInitialized && repository != null){
443
            List<String> propertyPaths = Arrays.asList("");
444
            List<Rights> existingRights = repository.getRightsService().list(null, null, null, null, propertyPaths);
445
            for (Rights right : existingRights){
446
                if (RightsType.COPYRIGHT().equals(right.getType())){
447
                    putCopyright(makeCopyrightKey(right), right);
448
                }
449
            }
450
            copyrightMapIsInitialized = true;
451
        }
452

    
453
    }
454

    
455
    /**
456
     * @param makeCopyrightKey
457
     * @param right
458
     */
459
    private void putCopyright(String key, Rights right) {
460
        Set<Rights> rights = copyrightMap.get(key);
461
        if (rights == null){
462
            rights = new HashSet<>();
463
            copyrightMap.put(key, rights);
464
        }
465
        rights.add(right);
466

    
467
    }
468

    
469
    /**
470
     * @param right
471
     * @return
472
     */
473
    private String makeCopyrightKey(Rights right) {
474
        if (right.getAgent() != null){
475
            return right.getAgent().getTitleCache();
476
        }else if (right.getText() != null){
477
            return right.getText();
478
        }else {
479
            logger.warn("Key for copyright could not be created: " + right);
480
            return right.getUuid().toString();
481
        }
482
    }
483

    
484

    
485
}
    (1-1/1)