Project

General

Profile

Download (22.8 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.common.utils;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Optional;
17
import java.util.Set;
18
import java.util.function.Predicate;
19

    
20
import org.apache.log4j.Logger;
21

    
22
import eu.etaxonomy.cdm.api.application.ICdmRepository;
23
import eu.etaxonomy.cdm.api.service.IService;
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.common.ImportResult;
26
import eu.etaxonomy.cdm.io.common.ImportStateBase;
27
import eu.etaxonomy.cdm.model.agent.AgentBase;
28
import eu.etaxonomy.cdm.model.agent.Institution;
29
import eu.etaxonomy.cdm.model.agent.Person;
30
import eu.etaxonomy.cdm.model.agent.Team;
31
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
32
import eu.etaxonomy.cdm.model.common.CdmBase;
33
import eu.etaxonomy.cdm.model.common.ICdmBase;
34
import eu.etaxonomy.cdm.model.media.Rights;
35
import eu.etaxonomy.cdm.model.media.RightsType;
36
import eu.etaxonomy.cdm.model.name.HybridRelationship;
37
import eu.etaxonomy.cdm.model.name.INonViralName;
38
import eu.etaxonomy.cdm.model.name.TaxonName;
39
import eu.etaxonomy.cdm.model.occurrence.Collection;
40
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
41
import eu.etaxonomy.cdm.model.reference.Reference;
42
import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
43
import eu.etaxonomy.cdm.strategy.match.IMatchStrategy;
44
import eu.etaxonomy.cdm.strategy.match.MatchException;
45

    
46
/**
47
 * Helper class for deduplicating authors, references, names, etc.
48
 * during import.
49
 * @author a.mueller
50
 * @date 11.02.2017
51
 *
52
 */
53
public class ImportDeduplicationHelper<STATE extends ImportStateBase<?,?>> {
54
    private static final Logger logger = Logger.getLogger(ImportDeduplicationHelper.class);
55

    
56
    private ICdmRepository repository;
57

    
58
    boolean referenceMapIsInitialized = false;
59
    boolean nameMapIsInitialized = false;
60
    boolean agentMapIsInitialized = false;
61
    boolean copyrightMapIsInitialized = false;
62
    boolean collectionMapIsInitialized = false;
63

    
64

    
65
    private Map<String, Set<Reference>> refMap = new HashMap<>();
66
    private Map<String, Set<Team>> teamMap = new HashMap<>();
67
    private Map<String, Set<Person>> personMap = new HashMap<>();
68
    private Map<String, Institution> institutionMap = new HashMap<>();
69
    //using titleCache
70
    private Map<String, Set<INonViralName>> nameMap = new HashMap<>();
71
    private Map<String, Set<Rights>> copyrightMap = new HashMap<>();
72
    private Map<String, Set<Collection>> collectionMap = new HashMap<>();
73

    
74

    
75
    private IMatchStrategy referenceMatcher = DefaultMatchStrategy.NewInstance(Reference.class);
76
//    private IMatchStrategy collectionMatcher = DefaultMatchStrategy.NewInstance(Collection.class);
77
    private IMatchStrategy nameMatcher = DefaultMatchStrategy.NewInstance(TaxonName.class);
78
    private IMatchStrategy personMatcher = DefaultMatchStrategy.NewInstance(Person.class);
79
    private IMatchStrategy teamMatcher = DefaultMatchStrategy.NewInstance(Team.class);
80

    
81

    
82

    
83

    
84
    public void restartSession(){
85
        restartSession(repository, null);
86
    }
87

    
88
    public void restartSession(ICdmRepository repository, ImportResult importResult){
89
        if (repository == null){
90
            return;
91
        }
92
        refMap = refreshSetMap(refMap, (IService)repository.getReferenceService(), importResult);
93
        personMap = refreshSetMap(personMap, (IService)repository.getAgentService(), importResult);
94
        teamMap = refreshSetMap(teamMap, (IService)repository.getAgentService(), importResult);
95
        institutionMap = refreshMap(institutionMap, (IService)repository.getAgentService(), importResult);
96

    
97
        nameMap = refreshSetMap(nameMap, (IService)repository.getNameService(), importResult);
98
        collectionMap = refreshSetMap(collectionMap, (IService)repository.getCollectionService(), importResult);
99
        //TODO copyright ?
100
    }
101

    
102

    
103
    /**
104
     * @param oldMap
105
     * @param service
106
     * @return
107
     */
108
    private <T extends ICdmBase> Map<String, T> refreshMap(Map<String, T> oldMap,
109
            IService<T> service, ImportResult importResult) {
110
        Map<String, T> newMap = new HashMap<>();
111
        for (String key : oldMap.keySet()){
112
            T old = oldMap.get(key);
113
            if (old!= null){
114
                T cdmBase = service.find(old.getUuid());
115
                if (cdmBase == null){
116
                    String message = "No cdm object was found for uuid " + old.getUuid() + " of class " + old.getClass().getSimpleName();
117
                    importResult.addWarning(message);
118
                }else{
119
                    newMap.put(key, cdmBase);
120
                }
121
            }else{
122
                String message = "Value for key " +  key + " was null in deduplication map";
123
                importResult.addWarning(message);
124
            }
125
        }
126
        return newMap;
127
    }
128

    
129
    private <T extends ICdmBase> Map<String, Set<T>> refreshSetMap(Map<String, Set<T>> oldMap,
130
            IService<T> service, ImportResult importResult) {
131
        Map<String, Set<T>> newMap = new HashMap<>();
132
        for (String key : oldMap.keySet()){
133
            Set<T> oldSet = oldMap.get(key);
134
            Set<T> newSet = new HashSet<>();
135
            if (oldSet != null){
136
                newMap.put(key, newSet);
137
                for (T item : oldSet){
138
                    T cdmBase = service.find(item.getUuid());
139
                    if (cdmBase == null){
140
                        String message = "No cdm object was found for uuid " + item.getUuid() + " of class " + item.getClass().getSimpleName();
141
                        importResult.addWarning(message);
142
                    }else{
143
                        newSet.add(cdmBase);
144
                    }
145
                }
146
            }else{
147
                String message = "Value for key " +  key + " was null in deduplication map";
148
                importResult.addWarning(message);
149
            }
150
        }
151
        return newMap;
152
    }
153

    
154
// ************************** FACTORY *******************************/
155

    
156
    public static ImportDeduplicationHelper<?> NewInstance(ICdmRepository repository){
157
        return new ImportDeduplicationHelper<>(repository);
158
    }
159

    
160
    public static ImportDeduplicationHelper<?> NewStandaloneInstance(){
161
        return new ImportDeduplicationHelper<>(null);
162
    }
163

    
164
    /**
165
     * @param repository
166
     * @param state not used, only for correct casting of generics
167
     * @return
168
     */
169
    public static <STATE extends ImportStateBase<?,?>> ImportDeduplicationHelper<STATE> NewInstance(ICdmRepository repository, STATE state){
170
        return new ImportDeduplicationHelper<>(repository);
171
    }
172

    
173
// ************************ CONSTRUCTOR *****************************/
174

    
175
    public ImportDeduplicationHelper(ICdmRepository repository) {
176
        this.repository = repository;
177
        if (repository == null){
178
            logger.warn("Repository is null. Deduplication does not work against database");
179
        }
180
    }
181

    
182
//************************ PUTTER / GETTER *****************************/
183

    
184
    //REFERENCES
185
    private void putReference(String title, Reference ref){
186
        Set<Reference> refs = refMap.get(title);
187
        if (refs == null){
188
            refs = new HashSet<>();
189
            refMap.put(title, refs);
190
        }
191
        refs.add(ref);
192
    }
193
    private Set<Reference> getReferences(String title){
194
        return refMap.get(title);
195
    }
196

    
197
    private Optional<Reference> getMatchingReference(Reference existing){
198
        Predicate<Reference> matchFilter = reference ->{
199
            try {
200
                return referenceMatcher.invoke(reference, existing);
201
            } catch (MatchException e) {
202
                throw new RuntimeException(e);
203
            }
204
        };
205
        return Optional.ofNullable(getReferences(existing.getTitleCache()))
206
                .orElse(new HashSet<>())
207
                .stream()
208
                .filter(matchFilter)
209
                .findAny();
210
    }
211

    
212
    // AGENTS
213
    private void putAgentBase(String title, AgentBase<?> agent){
214
        if (agent.isInstanceOf(Person.class) ){
215
            putAgent(title, CdmBase.deproxy(agent, Person.class), personMap);
216
        }else if (agent.isInstanceOf(Team.class)){
217
            putAgent(title, CdmBase.deproxy(agent, Team.class), teamMap);
218
        }else{
219
//            putAgent(title, CdmBase.deproxy(agent, Institution.class), institutionMap);
220
            institutionMap.put(title, CdmBase.deproxy(agent, Institution.class));
221
        }
222
    }
223
    //put agent
224
    private <T extends AgentBase> void putAgent(String title, T agent, Map<String, Set<T>> map){
225
        Set<T> items = map.get(title);
226
        if (items == null){
227
            items = new HashSet<>();
228
            map.put(title, items);
229
        }
230
        items.add(agent);
231
    }
232
//
233
//    private TeamOrPersonBase<?> getTeamOrPerson(TeamOrPersonBase<?> agent){
234
//        TeamOrPersonBase<?> result = getMatchingPerson(agent) ; // personMap.get(title);
235
//        if (result == null){
236
//            result = teamMap.get(title);
237
//        }
238
//        return result;
239
//    }
240

    
241
    private Optional<Person> getMatchingPerson(Person existing){
242
        Predicate<Person> matchFilter = person ->{
243
            try {
244
                return personMatcher.invoke(person, existing);
245
            } catch (MatchException e) {
246
                throw new RuntimeException(e);
247
            }
248
        };
249
        return Optional.ofNullable(getPersons(existing.getTitleCache()))
250
                .orElse(new HashSet<>())
251
                .stream()
252
                .filter(matchFilter)
253
                .findAny();
254
    }
255
    private TeamOrPersonBase<?> getTeamOrPerson(TeamOrPersonBase<?> agent){
256
        TeamOrPersonBase<?> result = agent;
257
        if (agent.isInstanceOf(Person.class)){
258
            result = getMatchingPerson(CdmBase.deproxy(agent, Person.class)).orElse(null) ; // personMap.get(title);
259
        }else if (agent.isInstanceOf(Team.class)) {
260
            result = getMatchingTeam(CdmBase.deproxy(agent, Team.class)).orElse(null); // teamMap.get(title);
261
        }
262
        return result;
263
    }
264

    
265
    private Optional<Team> getMatchingTeam(Team existing){
266
        Predicate<Team> matchFilter = person ->{
267
            try {
268
                return teamMatcher.invoke(person, existing);
269
            } catch (MatchException e) {
270
                throw new RuntimeException(e);
271
            }
272
        };
273
        return Optional.ofNullable(getTeam(existing.getTitleCache()))
274
                .orElse(new HashSet<>())
275
                .stream()
276
                .filter(matchFilter)
277
                .findAny();
278
    }
279
    private Set<Person> getPersons(String title){
280
        return personMap.get(title);
281
    }
282
    private Set<Team> getTeam(String title){
283
        return teamMap.get(title);
284
    }
285

    
286
    //NAMES
287
    private void putName(String title, INonViralName name){
288
        Set<INonViralName> names = nameMap.get(title);
289
        if (names == null){
290
            names = new HashSet<>();
291
            nameMap.put(title, names);
292
        }
293
        names.add(name);
294
    }
295
    private Set<INonViralName> getNames(String title){
296
        return nameMap.get(title);
297
    }
298

    
299
    private Optional<INonViralName> getMatchingName(INonViralName existing){
300
        Predicate<INonViralName> matchFilter = name ->{
301
            try {
302
                return nameMatcher.invoke(name, existing);
303
            } catch (MatchException e) {
304
                throw new RuntimeException(e);
305
            }
306
        };
307
        return Optional.ofNullable(getNames(existing.getTitleCache()))
308
                .orElse(new HashSet<>())
309
                .stream()
310
                .filter(matchFilter)
311
                .findAny();
312
    }
313

    
314
    //COLLECTIONS
315
    private void putCollection(String title, Collection collection){
316
        Set<Collection> collections = collectionMap.get(title);
317
        if (collections == null){
318
            collections = new HashSet<>();
319
            collectionMap.put(title, collections);
320
        }
321
        collections.add(collection);
322
    }
323

    
324
    private Set<Collection> getCollections(String title){
325
        return collectionMap.get(title);
326
    }
327

    
328
    private Optional<Collection> getMatchingCollections(Collection existing){
329
        Predicate<Collection> matchFilter = collection ->{
330
//            try {
331
                //TODO right Collection matching
332
                if (CdmUtils.nullSafeEqual(collection.getName(), existing.getName())
333
                        && CdmUtils.nullSafeEqual(collection.getCode(), existing.getCode())){
334
                    return true;
335
                }else{
336
                    return false;
337
                }
338
//                return collectionMatcher.invoke(collection, existing);
339
//            } catch (MatchException e) {
340
//                throw new RuntimeException(e);
341
//            }
342
        };
343
        return Optional.ofNullable(getCollections(existing.getTitleCache()))
344
                .orElse(new HashSet<>())
345
                .stream()
346
                .filter(matchFilter)
347
                .findAny();
348
    }
349

    
350
// **************************** METHODS *****************************/
351

    
352
    /**
353
     * This method replaces name authors, nomenclatural reference and
354
     * nomenclatural reference author by existing authors and references
355
     * if matching authors or references exist. If not, the given authors
356
     * and references are added to the map of existing entities.
357
     *
358
     * @param state the import state
359
     * @param name the name with authors and references to replace
360
     */
361
    public void replaceAuthorNamesAndNomRef(STATE state,
362
            INonViralName name) {
363
        TeamOrPersonBase<?> combAuthor = name.getCombinationAuthorship();
364
        name.setCombinationAuthorship(getExistingAuthor(state, combAuthor));
365

    
366
        TeamOrPersonBase<?> exAuthor = name.getExCombinationAuthorship();
367
        name.setExCombinationAuthorship(getExistingAuthor(state, exAuthor));
368

    
369
        TeamOrPersonBase<?> basioAuthor = name.getBasionymAuthorship();
370
        name.setBasionymAuthorship(getExistingAuthor(state, basioAuthor));
371

    
372
        TeamOrPersonBase<?> exBasioAuthor = name.getExBasionymAuthorship();
373
        name.setExBasionymAuthorship(getExistingAuthor(state, exBasioAuthor));
374

    
375
        INomenclaturalReference nomRef = name.getNomenclaturalReference();
376
        if (nomRef != null){
377
            TeamOrPersonBase<?> refAuthor = nomRef.getAuthorship();
378
            nomRef.setAuthorship(getExistingAuthor(state, refAuthor));
379

    
380
            Reference existingRef = getExistingReference(state, (Reference)nomRef);
381
            if (existingRef != null){
382
                name.setNomenclaturalReference(existingRef);
383
            }
384
        }
385
    }
386

    
387
    /**
388
     * @param state
389
     * @param combAuthor
390
     * @return
391
     */
392
    public TeamOrPersonBase<?> getExistingAuthor(STATE state,
393
            TeamOrPersonBase<?> author) {
394
        if (author == null){
395
            return null;
396
        }else{
397
            initAgentMap(state);
398
            TeamOrPersonBase<?> result = getTeamOrPerson(author);
399
            if (result == null){
400
                putAgentBase(author.getTitleCache(), author);
401
                if (author instanceof Team){
402
                    handleTeam(state, (Team)author);
403
                }
404
                result = author;
405
            }
406
            return result;
407
        }
408
    }
409

    
410
    public AgentBase<?> getExistingAgent(STATE state,
411
            AgentBase<?> agent) {
412
        if (agent == null){
413
            return null;
414
        } else if (agent.isInstanceOf(TeamOrPersonBase.class)){
415
            return getExistingAuthor(state, CdmBase.deproxy(agent, TeamOrPersonBase.class));
416
        }else{
417
            initAgentMap(state);
418
            Institution result = institutionMap.get(agent.getTitleCache());
419
            if (result == null){
420
                putAgentBase(agent.getTitleCache(), agent);
421
                result = CdmBase.deproxy(agent, Institution.class);
422
            }
423
            return result;
424
        }
425
    }
426

    
427

    
428
    /**
429
     * @param state
430
     *
431
     */
432
    @SuppressWarnings("rawtypes")
433
    private void initAgentMap(STATE state) {
434
        if (!agentMapIsInitialized && repository != null){
435
            List<String> propertyPaths = Arrays.asList("");
436
            List<AgentBase> existingAgents = repository.getAgentService().list(null, null, null, null, propertyPaths);
437
            for (AgentBase agent : existingAgents){
438
                putAgentBase(agent.getTitleCache(), agent);
439
            }
440
            agentMapIsInitialized = true;
441
        }
442
    }
443

    
444
    /**
445
     * @param state
446
     * @param author
447
     */
448
    private void handleTeam(STATE state, Team team) {
449
        List<Person> members = team.getTeamMembers();
450
        for (int i =0; i< members.size(); i++){
451
            Person person = members.get(i);
452
            Person existingPerson = getMatchingPerson(person).orElse(null);
453
            if (existingPerson != null){
454
                members.set(i, existingPerson);
455
            }else{
456
                putAgentBase(person.getTitleCache(), person);
457
            }
458
        }
459
    }
460

    
461

    
462
    /**
463
     * @param state
464
     * @param collection
465
     * @return
466
     */
467
    public Collection getExistingCollection(STATE state, Collection collection) {
468
        if (collection == null){
469
            return null;
470
        }else{
471
            initCollectionMap(state);
472
            Collection result = getMatchingCollections(collection).orElse(null);
473
            if (result == null){
474
                result = collection;
475
                putCollection(result.getTitleCache(), result);
476
            }else{
477
                if(logger.isDebugEnabled()) {
478
                    logger.debug("Matches");
479
                 }
480
            }
481
            return result;
482
        }
483
    }
484

    
485
    /**
486
     * @param state
487
     */
488
    private void initCollectionMap(STATE state) {
489
        if (!collectionMapIsInitialized && repository != null){
490
            List<String> propertyPaths = Arrays.asList("");
491
            List<Collection> existingCollections = repository.getCollectionService().list(null, null, null, null, propertyPaths);
492
            for (Collection collection : existingCollections){
493
                putCollection(collection.getTitleCache(), collection);
494
            }
495
            collectionMapIsInitialized = true;
496
        }
497
    }
498

    
499
    /**
500
    * @param state
501
    * @param nomRef
502
    */
503
   public Reference getExistingReference(STATE state, Reference ref) {
504
       if (ref == null){
505
           return null;
506
       }else{
507
           initRerenceMap(state);
508
           Reference result = getMatchingReference(ref).orElse(null);
509
           if (result == null){
510
               result = ref;
511
               Reference inRef = result.getInReference();
512
               if (inRef != null){
513
                   result.setInReference(getExistingReference(state, result.getInReference()));
514
               }
515
               putReference(result.getTitleCache(), result);
516
           }else{
517
               if(logger.isDebugEnabled()) {
518
                   logger.debug("Matches");
519
                }
520
           }
521
           return result;
522
       }
523
   }
524

    
525
   /**
526
    * @param state
527
    */
528
   private void initRerenceMap(STATE state) {
529
       if (!referenceMapIsInitialized && repository != null){
530
           List<String> propertyPaths = Arrays.asList("");
531
           List<Reference> existingReferences = repository.getReferenceService().list(null, null, null, null, propertyPaths);
532
           for (Reference ref : existingReferences){
533
               putReference(ref.getTitleCache(), ref);
534
           }
535
           referenceMapIsInitialized = true;
536
       }
537
   }
538

    
539
   /**
540
    * @param state
541
    * @param name
542
    */
543
   public <NAME extends INonViralName> NAME getExistingName(STATE state, NAME name) {
544
       if (name == null){
545
           return null;
546
       }else{
547
           initNameMap(state);
548
           @SuppressWarnings("unchecked")
549
           NAME result = (NAME)getMatchingName(name).orElse(null);
550
           if (result == null){
551
               result = name;
552
               Set<HybridRelationship> parentRelations = result.getHybridChildRelations();
553
               for (HybridRelationship rel : parentRelations){
554
                   INonViralName parent = rel.getParentName();
555
                   if (parent != null){
556
                       rel.setParentName(getExistingName(state, parent));
557
                   }
558
               }
559
               putName(result.getTitleCache(), result);
560
           }else{
561
               if(logger.isDebugEnabled()) {
562
                   logger.debug("Matches");
563
                }
564
           }
565
           return result;
566
       }
567
   }
568

    
569
   /**
570
    * @param state
571
    */
572
   private void initNameMap(STATE state) {
573
       if (!nameMapIsInitialized && repository != null){
574
           List<String> propertyPaths = Arrays.asList("");
575
           List<TaxonName> existingNames = repository.getNameService().list(null, null, null, null, propertyPaths);
576
           for (TaxonName name : existingNames){
577
               putName(name.getTitleCache(), name);
578
           }
579
          nameMapIsInitialized = true;
580
       }
581
   }
582

    
583

    
584

    
585
   public Rights getExistingCopyright(STATE state,
586
           Rights right) {
587
       if (right == null || !RightsType.COPYRIGHT().equals(right.getType())){
588
           return null;
589
       }else{
590
           initCopyrightMap(state);
591
           String key = makeCopyrightKey(right);
592
           Set<Rights> set = copyrightMap.get(key);
593
           if (set == null || set.isEmpty()){
594
               putCopyright(key, right);
595
               return right;
596
           }else if (set.size()>1){
597
               //TODO
598
               logger.warn("More than 1 matching copyright not yet handled for key: " + key);
599
           }
600
           return set.iterator().next();
601
       }
602
   }
603

    
604
    /**
605
     * @param state
606
     */
607
    private void initCopyrightMap(STATE state) {
608
        if (!copyrightMapIsInitialized && repository != null){
609
            List<String> propertyPaths = Arrays.asList("");
610
            List<Rights> existingRights = repository.getRightsService().list(null, null, null, null, propertyPaths);
611
            for (Rights right : existingRights){
612
                if (RightsType.COPYRIGHT().equals(right.getType())){
613
                    putCopyright(makeCopyrightKey(right), right);
614
                }
615
            }
616
            copyrightMapIsInitialized = true;
617
        }
618

    
619
    }
620

    
621
    /**
622
     * @param makeCopyrightKey
623
     * @param right
624
     */
625
    private void putCopyright(String key, Rights right) {
626
        Set<Rights> rights = copyrightMap.get(key);
627
        if (rights == null){
628
            rights = new HashSet<>();
629
            copyrightMap.put(key, rights);
630
        }
631
        rights.add(right);
632

    
633
    }
634

    
635
    /**
636
     * @param right
637
     * @return
638
     */
639
    private String makeCopyrightKey(Rights right) {
640
        if (right.getAgent() != null){
641
            return right.getAgent().getTitleCache();
642
        }else if (right.getText() != null){
643
            return right.getText();
644
        }else {
645
            logger.warn("Key for copyright could not be created: " + right);
646
            return right.getUuid().toString();
647
        }
648
    }
649

    
650

    
651

    
652
}
    (1-1/1)