Revision d5bca6b4
Added by Andreas Müller over 2 years ago
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/utils/ImportDeduplicationHelper.java | ||
---|---|---|
32 | 32 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
33 | 33 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
34 | 34 |
import eu.etaxonomy.cdm.model.common.ICdmBase; |
35 |
import eu.etaxonomy.cdm.model.common.IdentifiableEntity; |
|
35 | 36 |
import eu.etaxonomy.cdm.model.media.Rights; |
36 | 37 |
import eu.etaxonomy.cdm.model.media.RightsType; |
37 | 38 |
import eu.etaxonomy.cdm.model.name.HybridRelationship; |
... | ... | |
42 | 43 |
import eu.etaxonomy.cdm.model.reference.Reference; |
43 | 44 |
import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy; |
44 | 45 |
import eu.etaxonomy.cdm.strategy.match.IMatchStrategyEqual; |
46 |
import eu.etaxonomy.cdm.strategy.match.IMatchable; |
|
45 | 47 |
import eu.etaxonomy.cdm.strategy.match.MatchException; |
46 | 48 |
import eu.etaxonomy.cdm.strategy.match.MatchMode; |
47 | 49 |
|
... | ... | |
49 | 51 |
* Helper class for deduplicating authors, references, names, etc. |
50 | 52 |
* during import. |
51 | 53 |
* |
54 |
* Note 2021: Was originally used as fast deduplication tool for commandline imports |
|
55 |
* into empty databases. Currently it is transformed into a deduplication tool that |
|
56 |
* can be used during application based imports. |
|
57 |
* |
|
52 | 58 |
* @author a.mueller |
53 | 59 |
* @since 11.02.2017 |
54 | 60 |
*/ |
... | ... | |
58 | 64 |
|
59 | 65 |
private ICdmRepository repository; |
60 | 66 |
|
67 |
//for possible future use |
|
68 |
@SuppressWarnings("unused") |
|
61 | 69 |
private ImportStateBase<?,?> state; |
62 | 70 |
|
63 |
boolean referenceMapIsInitialized = false; |
|
64 |
boolean nameMapIsInitialized = false; |
|
65 |
boolean agentMapIsInitialized = false; |
|
66 |
boolean copyrightMapIsInitialized = false; |
|
67 |
boolean collectionMapIsInitialized = false; |
|
71 |
public static final int NEVER_USE_MAP = 0; |
|
72 |
public static final int ALWAYS_USE_MAP = -1; |
|
73 |
//should deduplication use maps indexing the full database content? If yes, what is the maximum number of records for this. |
|
74 |
//If more records exist deduplication is done on the fly. |
|
75 |
//0 = never use map |
|
76 |
//-1 = always use map |
|
77 |
private int maxCountFullLoad = ALWAYS_USE_MAP; |
|
78 |
public int getMaxCountFullLoad() { |
|
79 |
return maxCountFullLoad; |
|
80 |
} |
|
81 |
public void setMaxCountFullLoad(int maxCountFullLoad) { |
|
82 |
this.maxCountFullLoad = maxCountFullLoad; |
|
83 |
} |
|
84 |
|
|
85 |
private enum Status{ |
|
86 |
NOT_INIT, |
|
87 |
USE_MAP, |
|
88 |
USE_REPO; |
|
89 |
} |
|
90 |
|
|
91 |
private class DedupInfo<S extends IdentifiableEntity>{ |
|
92 |
Class<S> clazz; |
|
93 |
IMatchStrategyEqual matcher; |
|
94 |
Map<String, Set<S>> map = new HashMap<>(); |
|
95 |
Status status = Status.NOT_INIT; |
|
96 |
|
|
97 |
@SuppressWarnings("unchecked") |
|
98 |
private DedupInfo(Class<S> clazz, DedupMap dedupMap){ |
|
99 |
this.clazz = clazz; |
|
100 |
if (IMatchable.class.isAssignableFrom(clazz)) { |
|
101 |
matcher = DefaultMatchStrategy.NewInstance((Class<IMatchable>)clazz); |
|
102 |
} |
|
103 |
dedupMap.put(clazz, this); |
|
104 |
} |
|
105 |
@Override |
|
106 |
public String toString() { |
|
107 |
return clazz.getSimpleName() + ":" + status.name()+":mapsize=" + map.size()+":"+ (matcher == null?"without":"with") + " matcher"; |
|
108 |
} |
|
109 |
} |
|
110 |
|
|
111 |
private class DedupMap<T extends IdentifiableEntity> extends HashMap<Class<T>, DedupInfo<T>>{ |
|
112 |
private static final long serialVersionUID = 3757206594833330646L; |
|
113 |
} |
|
114 |
private DedupMap<? extends IdentifiableEntity> dedupMap = new DedupMap<>(); |
|
68 | 115 |
|
116 |
private DedupInfo<Reference> referenceDedupInfo = new DedupInfo<>(Reference.class, dedupMap); |
|
117 |
private DedupInfo<Person> personDedupInfo = new DedupInfo<>(Person.class, dedupMap); |
|
118 |
private DedupInfo<Team> teamDedupInfo = new DedupInfo<>(Team.class, dedupMap); |
|
119 |
private DedupInfo<TaxonName> nameDedupInfo = new DedupInfo<>(TaxonName.class, dedupMap); |
|
69 | 120 |
|
70 |
private Map<String, Set<Reference>> refMap = new HashMap<>(); |
|
71 |
private Map<String, Set<Team>> teamMap = new HashMap<>(); |
|
72 |
private Map<String, Set<Person>> personMap = new HashMap<>(); |
|
73 |
private Map<String, Institution> institutionMap = new HashMap<>(); |
|
121 |
|
|
122 |
private Status institutionStatus = Status.NOT_INIT; |
|
123 |
private Status copyrightStatus = Status.NOT_INIT; |
|
124 |
private Status collectionStatus = Status.NOT_INIT; |
|
125 |
|
|
126 |
private Map<String, Set<Institution>> institutionMap = new HashMap<>(); |
|
74 | 127 |
//using titleCache |
75 |
private Map<String, Set<INonViralName>> nameMap = new HashMap<>(); |
|
76 | 128 |
private Map<String, Set<Rights>> copyrightMap = new HashMap<>(); |
77 | 129 |
private Map<String, Set<Collection>> collectionMap = new HashMap<>(); |
78 | 130 |
|
... | ... | |
80 | 132 |
* Clears all internal maps. |
81 | 133 |
*/ |
82 | 134 |
public void reset() { |
83 |
refMap.clear(); |
|
84 |
teamMap.clear(); |
|
85 |
personMap.clear(); |
|
135 |
dedupMap.values().forEach(di->di.map.clear()); |
|
86 | 136 |
institutionMap.clear(); |
87 |
nameMap.clear(); |
|
88 | 137 |
copyrightMap.clear(); |
89 | 138 |
collectionMap.clear(); |
90 | 139 |
} |
91 | 140 |
|
92 |
private IMatchStrategyEqual referenceMatcher = DefaultMatchStrategy.NewInstance(Reference.class); |
|
93 | 141 |
// private IMatchStrategy collectionMatcher = DefaultMatchStrategy.NewInstance(Collection.class); |
94 |
private IMatchStrategyEqual nameMatcher = DefaultMatchStrategy.NewInstance(TaxonName.class); |
|
95 |
private IMatchStrategyEqual personMatcher = DefaultMatchStrategy.NewInstance(Person.class); |
|
96 |
private IMatchStrategyEqual teamMatcher = DefaultMatchStrategy.NewInstance(Team.class); |
|
97 |
|
|
98 | 142 |
|
99 | 143 |
// ************************** FACTORY *******************************/ |
100 | 144 |
|
101 |
/** |
|
102 |
* @param repository |
|
103 |
* @param state |
|
104 |
* @return |
|
105 |
*/ |
|
106 | 145 |
public static <STATE extends ImportStateBase<?,?>> ImportDeduplicationHelper NewInstance(ICdmRepository repository, STATE state){ |
107 | 146 |
return new ImportDeduplicationHelper(repository, state); |
108 | 147 |
} |
... | ... | |
119 | 158 |
} |
120 | 159 |
this.state = state; |
121 | 160 |
try { |
122 |
referenceMatcher.setMatchMode("title", MatchMode.EQUAL);
|
|
123 |
teamMatcher.setMatchMode("nomenclaturalTitleCache", MatchMode.EQUAL_OR_SECOND_NULL);
|
|
161 |
dedupMap.get(Reference.class).matcher.setMatchMode("title", MatchMode.EQUAL);
|
|
162 |
dedupMap.get(Team.class).matcher.setMatchMode("nomenclaturalTitleCache", MatchMode.EQUAL_OR_SECOND_NULL);
|
|
124 | 163 |
} catch (MatchException e) { |
125 | 164 |
throw new RuntimeException(e); //should not happen |
126 | 165 |
} |
... | ... | |
130 | 169 |
restartSession(repository, null); |
131 | 170 |
} |
132 | 171 |
|
172 |
/** |
|
173 |
* Clears all internal maps and loads them with same data as before but in current session. |
|
174 |
*/ |
|
133 | 175 |
public void restartSession(ICdmRepository repository, ImportResult importResult){ |
134 | 176 |
if (repository == null){ |
135 | 177 |
return; |
136 | 178 |
} |
137 |
refMap = refreshSetMap(refMap, (IService)repository.getReferenceService(), importResult);
|
|
138 |
personMap = refreshSetMap(personMap, (IService)repository.getAgentService(), importResult);
|
|
139 |
teamMap = refreshSetMap(teamMap, (IService)repository.getAgentService(), importResult);
|
|
140 |
institutionMap = refreshMap(institutionMap, (IService)repository.getAgentService(), importResult); |
|
179 |
referenceDedupInfo.map = refreshSetMap(referenceDedupInfo.map, (IService)repository.getReferenceService(), importResult);
|
|
180 |
personDedupInfo.map = refreshSetMap(personDedupInfo.map, (IService)repository.getAgentService(), importResult);
|
|
181 |
teamDedupInfo.map = refreshSetMap(teamDedupInfo.map, (IService)repository.getAgentService(), importResult);
|
|
182 |
institutionMap = refreshSetMap(institutionMap, (IService)repository.getAgentService(), importResult);
|
|
141 | 183 |
|
142 |
nameMap = refreshSetMap(nameMap, (IService)repository.getNameService(), importResult);
|
|
184 |
nameDedupInfo.map = refreshSetMap(nameDedupInfo.map, (IService)repository.getNameService(), importResult);
|
|
143 | 185 |
collectionMap = refreshSetMap(collectionMap, (IService)repository.getCollectionService(), importResult); |
144 | 186 |
//TODO copyright ? |
145 | 187 |
} |
146 | 188 |
|
189 |
//maybe this was used for Institution before |
|
147 | 190 |
private <T extends ICdmBase> Map<String, T> refreshMap(Map<String, T> oldMap, |
148 | 191 |
IService<T> service, ImportResult importResult) { |
192 |
|
|
149 | 193 |
Map<String, T> newMap = new HashMap<>(); |
150 | 194 |
for (String key : oldMap.keySet()){ |
151 | 195 |
T old = oldMap.get(key); |
... | ... | |
210 | 254 |
|
211 | 255 |
//************************ PUTTER / GETTER *****************************/ |
212 | 256 |
|
213 |
//REFERENCES
|
|
214 |
private void putReference(String title, Reference ref){
|
|
215 |
Set<Reference> refs = refMap.get(title);
|
|
216 |
if (refs == null){
|
|
217 |
refs = new HashSet<>();
|
|
218 |
refMap.put(title, refs);
|
|
257 |
//ENTITY
|
|
258 |
private <S extends IdentifiableEntity<?>> void putEntity(String title, S entity, Map<String,Set<S>> map){
|
|
259 |
Set<S> entitySet = map.get(title);
|
|
260 |
if (entitySet == null){
|
|
261 |
entitySet = new HashSet<>();
|
|
262 |
map.put(title, entitySet);
|
|
219 | 263 |
} |
220 |
refs.add(CdmBase.deproxy(ref));
|
|
264 |
entitySet.add(CdmBase.deproxy(entity));
|
|
221 | 265 |
} |
222 |
private Set<Reference> getReferences(String title){ |
|
223 |
return refMap.get(title); |
|
266 |
|
|
267 |
private <S extends IdentifiableEntity> Set<S> getEntityByTitle(String title, DedupInfo<S> dedupInfo){ |
|
268 |
return dedupInfo.map.get(title); |
|
224 | 269 |
} |
225 | 270 |
|
226 |
private Optional<Reference> getMatchingReference(Reference newReference){ |
|
227 |
Predicate<Reference> matchFilter = reference ->{ |
|
271 |
private <S extends IdentifiableEntity> Optional<S> getMatchingEntity(S entityOrig, DedupInfo<S> dedupInfo){ |
|
272 |
S entity = CdmBase.deproxy(entityOrig); |
|
273 |
Predicate<S> matchFilter = reference ->{ |
|
228 | 274 |
try { |
229 |
return referenceMatcher.invoke(reference, newReference).isSuccessful();
|
|
275 |
return dedupInfo.matcher.invoke((IMatchable)reference, (IMatchable)entity).isSuccessful();
|
|
230 | 276 |
} catch (MatchException e) { |
231 | 277 |
throw new RuntimeException(e); |
232 | 278 |
} |
233 | 279 |
}; |
234 |
return Optional.ofNullable(getReferences(newReference.getTitleCache()))
|
|
280 |
Optional<S> result = Optional.ofNullable(getEntityByTitle(entity.getTitleCache(), dedupInfo))
|
|
235 | 281 |
.orElse(new HashSet<>()) |
236 | 282 |
.stream() |
237 | 283 |
.filter(matchFilter) |
238 | 284 |
.findAny(); |
285 |
if (result.isPresent() || dedupInfo.status == Status.USE_MAP || repository == null){ |
|
286 |
return result; |
|
287 |
}else { |
|
288 |
try { |
|
289 |
return (Optional<S>)repository.getCommonService().findMatching((IMatchable)entity, dedupInfo.matcher).stream().findFirst(); |
|
290 |
} catch (MatchException e) { |
|
291 |
throw new RuntimeException(e); |
|
292 |
} |
|
293 |
} |
|
239 | 294 |
} |
240 | 295 |
|
241 | 296 |
// AGENTS |
242 | 297 |
private void putAgentBase(String title, AgentBase<?> agent){ |
243 | 298 |
if (agent.isInstanceOf(Person.class) ){ |
244 |
putAgent(title, CdmBase.deproxy(agent, Person.class), personMap);
|
|
299 |
putEntity(title, CdmBase.deproxy(agent, Person.class), personDedupInfo.map);
|
|
245 | 300 |
}else if (agent.isInstanceOf(Team.class)){ |
246 |
putAgent(title, CdmBase.deproxy(agent, Team.class), teamMap);
|
|
301 |
putEntity(title, CdmBase.deproxy(agent, Team.class), teamDedupInfo.map);
|
|
247 | 302 |
}else{ |
248 |
// putAgent(title, CdmBase.deproxy(agent, Institution.class), institutionMap); |
|
249 |
institutionMap.put(title, CdmBase.deproxy(agent, Institution.class)); |
|
250 |
} |
|
251 |
} |
|
252 |
//put agent |
|
253 |
private <T extends AgentBase> void putAgent(String title, T agent, Map<String, Set<T>> map){ |
|
254 |
Set<T> items = map.get(title); |
|
255 |
if (items == null){ |
|
256 |
items = new HashSet<>(); |
|
257 |
map.put(title, items); |
|
303 |
putEntity(title, CdmBase.deproxy(agent, Institution.class), institutionMap); |
|
258 | 304 |
} |
259 |
items.add(CdmBase.deproxy(agent)); |
|
260 |
} |
|
261 |
|
|
262 |
private Optional<Person> getMatchingPerson(Person newPerson){ |
|
263 |
Person newPersonDeproxy = CdmBase.deproxy(newPerson); |
|
264 |
Predicate<Person> matchFilter = (person) ->{ |
|
265 |
try { |
|
266 |
return personMatcher.invoke(person, newPersonDeproxy).isSuccessful(); |
|
267 |
} catch (MatchException e) { |
|
268 |
throw new RuntimeException(e); |
|
269 |
} |
|
270 |
}; |
|
271 |
|
|
272 |
return Optional.ofNullable(getPersons(newPerson.getTitleCache())) |
|
273 |
.orElse(new HashSet<>()) |
|
274 |
.stream() |
|
275 |
.filter(matchFilter) |
|
276 |
.findAny(); |
|
277 | 305 |
} |
278 | 306 |
|
279 | 307 |
private <T extends TeamOrPersonBase<?>> T getTeamOrPerson(T agent){ |
280 | 308 |
T result = agent; |
281 | 309 |
if (agent.isInstanceOf(Person.class)){ |
282 |
result = (T)getMatchingPerson(CdmBase.deproxy(agent, Person.class)).orElse(null) ; // personMap.get(title);
|
|
310 |
result = (T)getMatchingEntity(CdmBase.deproxy(agent, Person.class), personDedupInfo).orElse(null) ; // personMap.get(title);
|
|
283 | 311 |
}else if (agent.isInstanceOf(Team.class)) { |
284 |
result = (T)getMatchingTeam(CdmBase.deproxy(agent, Team.class)).orElse(null); // teamMap.get(title);
|
|
312 |
result = (T)getMatchingEntity(CdmBase.deproxy(agent, Team.class), teamDedupInfo).orElse(null); // teamMap.get(title);
|
|
285 | 313 |
} |
286 | 314 |
return result; |
287 | 315 |
} |
288 | 316 |
|
289 |
private Optional<Team> getMatchingTeam(Team newTeam){ |
|
290 |
Team newTeamDeproxy = CdmBase.deproxy(newTeam); |
|
291 |
Predicate<Team> matchFilter = (team) ->{ |
|
292 |
try { |
|
293 |
return teamMatcher.invoke(team, newTeamDeproxy).isSuccessful(); |
|
294 |
} catch (MatchException e) { |
|
295 |
throw new RuntimeException(e); |
|
296 |
} |
|
297 |
}; |
|
298 |
//TODO better adapt matching strategy |
|
299 |
// newTeam.getNomenclaturalTitle(); |
|
300 |
return Optional.ofNullable(getTeams(newTeam.getTitleCache())) |
|
301 |
.orElse(new HashSet<>()) |
|
302 |
.stream() |
|
303 |
.filter(matchFilter) |
|
304 |
.findAny(); |
|
305 |
} |
|
306 |
private Set<Person> getPersons(String title){ |
|
307 |
return personMap.get(title); |
|
308 |
} |
|
309 |
private Set<Team> getTeams(String title){ |
|
310 |
return teamMap.get(title); |
|
311 |
} |
|
312 |
|
|
313 |
//NAMES |
|
314 |
private void putName(String title, INonViralName name){ |
|
315 |
Set<INonViralName> names = nameMap.get(title); |
|
316 |
if (names == null){ |
|
317 |
names = new HashSet<>(); |
|
318 |
nameMap.put(title, names); |
|
319 |
} |
|
320 |
names.add(CdmBase.deproxy(name)); |
|
321 |
} |
|
322 |
private Set<INonViralName> getNames(String title){ |
|
323 |
return nameMap.get(title); |
|
324 |
} |
|
325 |
|
|
326 |
private Optional<INonViralName> getMatchingName(INonViralName existing){ |
|
327 |
Predicate<INonViralName> matchFilter = name ->{ |
|
328 |
try { |
|
329 |
return nameMatcher.invoke(name, existing).isSuccessful(); |
|
330 |
} catch (MatchException e) { |
|
331 |
throw new RuntimeException(e); |
|
332 |
} |
|
333 |
}; |
|
334 |
return Optional.ofNullable(getNames(existing.getTitleCache())) |
|
335 |
.orElse(new HashSet<>()) |
|
336 |
.stream() |
|
337 |
.filter(matchFilter) |
|
338 |
.findAny(); |
|
339 |
} |
|
340 |
|
|
341 | 317 |
//COLLECTIONS |
342 |
private void putCollection(String title, Collection collection){ |
|
343 |
Set<Collection> collections = collectionMap.get(title); |
|
344 |
if (collections == null){ |
|
345 |
collections = new HashSet<>(); |
|
346 |
collectionMap.put(title, collections); |
|
347 |
} |
|
348 |
collections.add(CdmBase.deproxy(collection)); |
|
349 |
} |
|
350 |
|
|
351 | 318 |
private Set<Collection> getCollections(String title){ |
352 | 319 |
return collectionMap.get(title); |
353 | 320 |
} |
... | ... | |
405 | 372 |
nomRef.setAuthorship(getExistingAuthor(refAuthor)); |
406 | 373 |
|
407 | 374 |
Reference existingRef = getExistingReference((Reference)nomRef); |
375 |
//TODO AM: why do we need to check null here (we don't do this for authors, maybe because it is an original source?) |
|
408 | 376 |
if (existingRef != null){ |
409 | 377 |
name.setNomenclaturalReference(existingRef); |
410 | 378 |
} |
411 | 379 |
} |
412 | 380 |
} |
413 | 381 |
|
382 |
public void replaceReferenceRelatedData(Reference ref) { |
|
383 |
|
|
384 |
TeamOrPersonBase<?> author = ref.getAuthorship(); |
|
385 |
ref.setAuthorship(getExistingAuthor(author)); |
|
386 |
|
|
387 |
ref.setInReference(getExistingReference(ref.getInReference())); |
|
388 |
} |
|
389 |
|
|
414 | 390 |
public <T extends TeamOrPersonBase<?>> T getExistingAuthor(T author) { |
415 | 391 |
if (author == null){ |
416 | 392 |
return null; |
417 | 393 |
}else{ |
418 |
initAgentMap(); |
|
394 |
//TODO |
|
395 |
init(personDedupInfo); |
|
396 |
init(teamDedupInfo); |
|
419 | 397 |
initAuthorTitleCaches(author); |
420 | 398 |
T result = getTeamOrPerson(author); |
421 | 399 |
if (result == null){ |
... | ... | |
460 | 438 |
} else if (agent.isInstanceOf(TeamOrPersonBase.class)){ |
461 | 439 |
return getExistingAuthor(CdmBase.deproxy(agent, TeamOrPersonBase.class)); |
462 | 440 |
}else{ |
463 |
initAgentMap();
|
|
464 |
Institution result = institutionMap.get(agent.getTitleCache());
|
|
465 |
if (result == null){
|
|
466 |
putAgentBase(agent.getTitleCache(), agent);
|
|
467 |
result = CdmBase.deproxy(agent, Institution.class);
|
|
468 |
} |
|
469 |
return result; |
|
441 |
throw new RuntimeException("Institution matching not yet implemented");
|
|
442 |
// initInstitutionMap();
|
|
443 |
// Set<Institution> result = institutionMap.get(agent.getTitleCache());
|
|
444 |
// if (result == null){
|
|
445 |
// result = putEntity(agent.getTitleCache(), CdmBase.deproxy(agent, Institution.class), institutionMap);
|
|
446 |
// }
|
|
447 |
// return result;
|
|
470 | 448 |
} |
471 | 449 |
} |
472 | 450 |
|
473 |
@SuppressWarnings("rawtypes") |
|
474 |
private void initAgentMap() { |
|
475 |
if (!agentMapIsInitialized && repository != null){ |
|
476 |
List<String> propertyPaths = Arrays.asList(""); |
|
477 |
List<AgentBase> existingAgents = repository.getAgentService().list(null, null, null, null, propertyPaths); |
|
478 |
for (AgentBase agent : existingAgents){ |
|
479 |
putAgentBase(agent.getTitleCache(), CdmBase.deproxy(agent)); |
|
451 |
private <S extends IdentifiableEntity<?>> void init(DedupInfo<S> dedupInfo) { |
|
452 |
dedupInfo.status = init(dedupInfo.clazz, dedupInfo.status, dedupInfo.map); |
|
453 |
} |
|
454 |
|
|
455 |
private <S extends IdentifiableEntity<?>> Status init(Class<S> clazz, Status status, Map<String,Set<S>> map) { |
|
456 |
|
|
457 |
if (status == Status.NOT_INIT && repository != null){ |
|
458 |
if (maxCountFullLoad != NEVER_USE_MAP){ |
|
459 |
long nExisting = -2; |
|
460 |
if (maxCountFullLoad != ALWAYS_USE_MAP){ |
|
461 |
nExisting = repository.getCommonService().count(clazz); |
|
462 |
} |
|
463 |
if (nExisting <= maxCountFullLoad ){ |
|
464 |
List<String> propertyPaths = Arrays.asList(""); |
|
465 |
List<S> existingEntities = repository.getCommonService().list(clazz, null, null, null, propertyPaths); |
|
466 |
for (S ref : existingEntities){ |
|
467 |
putEntity(ref.getTitleCache(), ref, map); |
|
468 |
} |
|
469 |
return Status.USE_MAP; |
|
470 |
}else{ |
|
471 |
return Status.USE_REPO; |
|
472 |
} |
|
473 |
}else{ |
|
474 |
return Status.USE_REPO; |
|
480 | 475 |
} |
481 |
agentMapIsInitialized = true; |
|
482 | 476 |
} |
477 |
return status; |
|
483 | 478 |
} |
484 | 479 |
|
485 | 480 |
private void handleTeam(Team team) { |
486 | 481 |
List<Person> members = team.getTeamMembers(); |
487 | 482 |
for (int i =0; i< members.size(); i++){ |
488 | 483 |
Person person = CdmBase.deproxy(members.get(i)); |
489 |
Person existingPerson = getMatchingPerson(person).orElse(null);
|
|
484 |
Person existingPerson = getMatchingEntity(person, personDedupInfo).orElse(null);
|
|
490 | 485 |
if (existingPerson != null){ |
491 | 486 |
members.set(i, existingPerson); |
492 | 487 |
}else{ |
... | ... | |
503 | 498 |
Collection result = getMatchingCollections(collection).orElse(null); |
504 | 499 |
if (result == null){ |
505 | 500 |
result = collection; |
506 |
putCollection(result.getTitleCache(), result);
|
|
501 |
putEntity(result.getTitleCache(), result, collectionMap);
|
|
507 | 502 |
}else{ |
508 | 503 |
if(logger.isDebugEnabled()) { |
509 | 504 |
logger.debug("Matches"); |
... | ... | |
514 | 509 |
} |
515 | 510 |
|
516 | 511 |
private void initCollectionMap() { |
517 |
if (!collectionMapIsInitialized && repository != null){ |
|
518 |
List<String> propertyPaths = Arrays.asList(""); |
|
519 |
List<Collection> existingCollections = repository.getCollectionService().list(null, null, null, null, propertyPaths); |
|
520 |
for (Collection collection : existingCollections){ |
|
521 |
putCollection(collection.getTitleCache(), collection); |
|
522 |
} |
|
523 |
collectionMapIsInitialized = true; |
|
524 |
} |
|
512 |
collectionStatus = init(Collection.class, collectionStatus, collectionMap); |
|
525 | 513 |
} |
526 | 514 |
|
527 | 515 |
public Reference getExistingReference(Reference ref) { |
528 | 516 |
if (ref == null){ |
529 | 517 |
return null; |
530 | 518 |
}else{ |
531 |
initRerenceMap();
|
|
519 |
init(referenceDedupInfo);
|
|
532 | 520 |
initReferenceCaches(ref); |
533 |
Reference result = getMatchingReference(ref).orElse(null);
|
|
521 |
Reference result = getMatchingEntity(ref, referenceDedupInfo).orElse(null);
|
|
534 | 522 |
if (result == null){ |
535 | 523 |
result = ref; |
536 | 524 |
Reference inRef = result.getInReference(); |
537 | 525 |
if (inRef != null){ |
538 | 526 |
result.setInReference(getExistingReference(result.getInReference())); |
539 | 527 |
} |
540 |
putReference(result.getTitleCache(), result);
|
|
528 |
putEntity(result.getTitleCache(), result, referenceDedupInfo.map);
|
|
541 | 529 |
}else{ |
542 |
if(logger.isDebugEnabled()) { |
|
543 |
logger.debug("Matches"); |
|
544 |
} |
|
530 |
if(logger.isDebugEnabled()) {logger.debug("Matches");} |
|
545 | 531 |
} |
546 | 532 |
return result; |
547 | 533 |
} |
548 | 534 |
} |
549 | 535 |
|
550 |
private void initRerenceMap() { |
|
551 |
if (!referenceMapIsInitialized && repository != null){ |
|
552 |
List<String> propertyPaths = Arrays.asList(""); |
|
553 |
List<Reference> existingReferences = repository.getReferenceService().list(null, null, null, null, propertyPaths); |
|
554 |
for (Reference ref : existingReferences){ |
|
555 |
putReference(ref.getTitleCache(), ref); |
|
556 |
} |
|
557 |
referenceMapIsInitialized = true; |
|
558 |
} |
|
559 |
} |
|
560 |
|
|
561 |
public <NAME extends INonViralName> NAME getExistingName(NAME name) { |
|
536 |
public TaxonName getExistingName(TaxonName name) { |
|
562 | 537 |
if (name == null){ |
563 | 538 |
return null; |
564 | 539 |
}else{ |
565 |
initNameMap(); |
|
566 |
@SuppressWarnings("unchecked") |
|
567 |
NAME result = (NAME)getMatchingName(name).orElse(null); |
|
540 |
init(nameDedupInfo); |
|
541 |
TaxonName result = getMatchingEntity(name, nameDedupInfo).orElse(null); |
|
568 | 542 |
if (result == null){ |
569 | 543 |
result = name; |
570 | 544 |
Set<HybridRelationship> parentRelations = result.getHybridChildRelations(); |
571 | 545 |
for (HybridRelationship rel : parentRelations){ |
572 |
INonViralName parent = rel.getParentName();
|
|
546 |
TaxonName parent = rel.getParentName();
|
|
573 | 547 |
if (parent != null){ |
574 | 548 |
rel.setParentName(getExistingName(parent)); |
575 | 549 |
} |
576 | 550 |
} |
577 |
putName(result.getTitleCache(), result);
|
|
551 |
putEntity(result.getTitleCache(), result, nameDedupInfo.map);
|
|
578 | 552 |
}else{ |
579 | 553 |
if(logger.isDebugEnabled()) { |
580 | 554 |
logger.debug("Matches"); |
... | ... | |
584 | 558 |
} |
585 | 559 |
} |
586 | 560 |
|
587 |
private void initNameMap() { |
|
588 |
if (!nameMapIsInitialized && repository != null){ |
|
589 |
List<String> propertyPaths = Arrays.asList(""); |
|
590 |
List<TaxonName> existingNames = repository.getNameService().list(null, null, null, null, propertyPaths); |
|
591 |
for (TaxonName name : existingNames){ |
|
592 |
putName(name.getTitleCache(), name); |
|
593 |
} |
|
594 |
nameMapIsInitialized = true; |
|
595 |
} |
|
596 |
} |
|
597 |
|
|
598 | 561 |
public Rights getExistingCopyright(Rights right) { |
599 | 562 |
if (right == null || !RightsType.COPYRIGHT().equals(right.getType())){ |
600 | 563 |
return null; |
... | ... | |
614 | 577 |
} |
615 | 578 |
|
616 | 579 |
private void initCopyrightMap() { |
617 |
if (!copyrightMapIsInitialized && repository != null){
|
|
580 |
if (copyrightStatus == Status.NOT_INIT && repository != null){
|
|
618 | 581 |
List<String> propertyPaths = Arrays.asList(""); |
619 | 582 |
List<Rights> existingRights = repository.getRightsService().list(null, null, null, null, propertyPaths); |
620 | 583 |
for (Rights right : existingRights){ |
... | ... | |
622 | 585 |
putCopyright(makeCopyrightKey(right), right); |
623 | 586 |
} |
624 | 587 |
} |
625 |
copyrightMapIsInitialized = true;
|
|
588 |
copyrightStatus = Status.USE_MAP;
|
|
626 | 589 |
} |
627 | 590 |
} |
628 | 591 |
|
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/reference/ris/in/RisReferenceImport.java | ||
---|---|---|
24 | 24 |
import eu.etaxonomy.cdm.common.DOI; |
25 | 25 |
import eu.etaxonomy.cdm.common.URI; |
26 | 26 |
import eu.etaxonomy.cdm.io.common.CdmImportBase; |
27 |
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper; |
|
27 | 28 |
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue; |
28 | 29 |
import eu.etaxonomy.cdm.model.agent.Person; |
29 | 30 |
import eu.etaxonomy.cdm.model.agent.Team; |
... | ... | |
69 | 70 |
try { |
70 | 71 |
location = recordLocation(state, next); |
71 | 72 |
ref = handleSingleReference(state, next); |
72 |
referencesToSave.add(ref); |
|
73 |
if (ref.getInReference() != null){ |
|
74 |
referencesToSave.add(ref.getInReference()); |
|
73 |
|
|
74 |
Reference existingRef = state.getDeduplicationHelper().getExistingReference(ref); |
|
75 |
if (existingRef == ref){ //reference does not yet exist so the identical reference has been returned |
|
76 |
state.getDeduplicationHelper().replaceReferenceRelatedData(ref); |
|
77 |
referencesToSave.add(ref); |
|
78 |
if (ref.getInReference() != null){ |
|
79 |
referencesToSave.add(ref.getInReference()); |
|
80 |
} |
|
81 |
}else{ |
|
82 |
//merge ? |
|
75 | 83 |
} |
76 | 84 |
} catch (Exception e) { |
77 | 85 |
String message = "Unexpected exception during RIS Reference Import"; |
... | ... | |
510 | 518 |
return cdmType; |
511 | 519 |
} |
512 | 520 |
|
521 |
@Override |
|
522 |
public ImportDeduplicationHelper createDeduplicationHelper(RisReferenceImportState state){ |
|
523 |
ImportDeduplicationHelper result = super.createDeduplicationHelper(state); |
|
524 |
result.setMaxCountFullLoad(state.getConfig().getDeduplicationMaxCountForFullLoad()); |
|
525 |
return result; |
|
526 |
} |
|
527 |
|
|
513 | 528 |
@Override |
514 | 529 |
protected boolean doCheck(RisReferenceImportState state) { |
515 | 530 |
return true; |
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/reference/ris/in/RisReferenceImportConfigurator.java | ||
---|---|---|
32 | 32 |
private static final long serialVersionUID = -5982826645441621962L; |
33 | 33 |
// private static IInputTransformer defaultTransformer = null; |
34 | 34 |
|
35 |
private int deduplicationMaxCountForFullLoad = 200; |
|
36 |
|
|
37 |
//********************************** FACTORY ***********************************/ |
|
38 |
|
|
35 | 39 |
public static RisReferenceImportConfigurator NewInstance(URI uri, ICdmDataSource cdm) { |
36 | 40 |
RisReferenceImportConfigurator result = new RisReferenceImportConfigurator(uri, cdm); |
37 | 41 |
return result; |
... | ... | |
41 | 45 |
InputStream stream = url.openStream(); |
42 | 46 |
InputStreamReader reader = new InputStreamReader(stream, "UTF8"); |
43 | 47 |
|
44 |
RisReferenceImportConfigurator result = new RisReferenceImportConfigurator(); |
|
48 |
RisReferenceImportConfigurator result = new RisReferenceImportConfigurator(null, cdm);
|
|
45 | 49 |
result.setStream(IOUtils.toByteArray(reader, Charset.defaultCharset())); |
46 | 50 |
return result; |
47 | 51 |
} |
... | ... | |
52 | 56 |
return result; |
53 | 57 |
} |
54 | 58 |
|
55 |
protected RisReferenceImportConfigurator() { |
|
56 |
super(null,null); |
|
57 |
} |
|
59 |
//************************ CONSTRUCTOR ****************************************/ |
|
58 | 60 |
|
59 |
protected RisReferenceImportConfigurator(URI uri, ICdmDataSource cdm) {
|
|
61 |
private RisReferenceImportConfigurator(URI uri, ICdmDataSource cdm) {
|
|
60 | 62 |
super(uri, cdm, null); |
61 | 63 |
} |
62 | 64 |
|
65 |
// ************************* GETTER / SETTER *************************************/ |
|
66 |
|
|
67 |
public int getDeduplicationMaxCountForFullLoad() { |
|
68 |
return deduplicationMaxCountForFullLoad; |
|
69 |
} |
|
70 |
public void setDeduplicationMaxCountForFullLoad(int deduplicationMaxCountForFullLoad) { |
|
71 |
this.deduplicationMaxCountForFullLoad = deduplicationMaxCountForFullLoad; |
|
72 |
} |
|
73 |
|
|
74 |
//********************** METHODS ********************************************/ |
|
75 |
|
|
76 |
@SuppressWarnings("unchecked") |
|
63 | 77 |
@Override |
64 | 78 |
public RisReferenceImportState getNewState() { |
65 | 79 |
return new RisReferenceImportState(this); |
... | ... | |
91 | 105 |
return true; |
92 | 106 |
} |
93 | 107 |
|
94 |
|
|
95 |
} |
|
108 |
} |
cdmlib-io/src/test/java/eu/etaxonomy/cdm/io/referenceris/in/RisReferenceImportTest.java | ||
---|---|---|
13 | 13 |
import java.io.FileNotFoundException; |
14 | 14 |
import java.io.IOException; |
15 | 15 |
import java.net.URL; |
16 |
import java.util.Collections; |
|
16 | 17 |
import java.util.List; |
17 | 18 |
|
18 | 19 |
import org.junit.Assert; |
... | ... | |
22 | 23 |
import org.unitils.spring.annotation.SpringBeanByName; |
23 | 24 |
import org.unitils.spring.annotation.SpringBeanByType; |
24 | 25 |
|
26 |
import eu.etaxonomy.cdm.api.service.IAgentService; |
|
25 | 27 |
import eu.etaxonomy.cdm.api.service.IReferenceService; |
26 | 28 |
//import eu.etaxonomy.cdm.common.DOI; |
27 | 29 |
import eu.etaxonomy.cdm.io.common.CdmApplicationAwareDefaultImport; |
28 | 30 |
import eu.etaxonomy.cdm.io.common.ImportResult; |
29 | 31 |
import eu.etaxonomy.cdm.io.reference.ris.in.RisReferenceImportConfigurator; |
32 |
import eu.etaxonomy.cdm.model.agent.Institution; |
|
30 | 33 |
import eu.etaxonomy.cdm.model.agent.Person; |
31 | 34 |
import eu.etaxonomy.cdm.model.agent.Team; |
32 | 35 |
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase; |
33 | 36 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
37 |
import eu.etaxonomy.cdm.model.common.IdentifiableEntity; |
|
34 | 38 |
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod; |
35 | 39 |
import eu.etaxonomy.cdm.model.reference.Reference; |
36 | 40 |
import eu.etaxonomy.cdm.model.reference.ReferenceType; |
... | ... | |
48 | 52 |
@SpringBeanByType |
49 | 53 |
private IReferenceService referenceService; |
50 | 54 |
|
51 |
private RisReferenceImportConfigurator configurator;
|
|
52 |
private RisReferenceImportConfigurator configLong;
|
|
55 |
@SpringBeanByType
|
|
56 |
private IAgentService agentService;
|
|
53 | 57 |
|
54 | 58 |
@Before |
55 |
public void setUp() { |
|
56 |
String inputFile = "/eu/etaxonomy/cdm/io/reference/ris/in/RisReferenceImportTest-input.ris"; |
|
57 |
|
|
58 |
try { |
|
59 |
URL url = this.getClass().getResource(inputFile); |
|
60 |
assertNotNull("URL for the test file '" + inputFile + "' does not exist", url); |
|
61 |
|
|
62 |
String inputFileLong = "/eu/etaxonomy/cdm/io/reference/ris/in/Acantholimon.ris"; |
|
63 |
URL urlLong = this.getClass().getResource(inputFileLong); |
|
64 |
assertNotNull("URL for the test file '" + inputFileLong + "' does not exist", urlLong); |
|
65 |
|
|
66 |
configurator = RisReferenceImportConfigurator.NewInstance(url, null); |
|
67 |
configLong = RisReferenceImportConfigurator.NewInstance(urlLong, null); |
|
68 |
|
|
69 |
} catch (Exception e) { |
|
70 |
e.printStackTrace(); |
|
71 |
Assert.fail(); |
|
72 |
} |
|
73 |
assertNotNull("Configurator could not be created", configurator); |
|
74 |
assertNotNull("Configurator could not be created", configLong); |
|
75 |
assertNotNull("nameService should not be null", referenceService); |
|
76 |
} |
|
59 |
public void setUp() {} |
|
77 | 60 |
|
78 | 61 |
//***************************** TESTS *************************************// |
79 | 62 |
|
... | ... | |
81 | 64 |
@DataSet( value="/eu/etaxonomy/cdm/database/ClearDBDataSet.xml", loadStrategy=CleanSweepInsertLoadStrategy.class) |
82 | 65 |
//@Ignore |
83 | 66 |
public void testShort() { |
84 |
|
|
67 |
RisReferenceImportConfigurator configurator = getConfigurator("RisReferenceImportTest-input.ris"); |
|
85 | 68 |
ImportResult result = defaultImport.invoke(configurator); |
86 | 69 |
String report = result.createReport().toString(); |
87 | 70 |
Assert.assertTrue(report.length() > 0); |
... | ... | |
142 | 125 |
} |
143 | 126 |
|
144 | 127 |
@Test |
145 |
public void testChapter() throws IOException{ |
|
146 |
String inputFileLong = "/eu/etaxonomy/cdm/io/reference/ris/in/Arias2012.ris"; |
|
147 |
URL urlLong = this.getClass().getResource(inputFileLong); |
|
148 |
configurator = RisReferenceImportConfigurator.NewInstance(urlLong, null); |
|
128 |
public void testChapter() { |
|
129 |
|
|
130 |
final RisReferenceImportConfigurator configurator = getConfigurator("Arias2012.ris"); |
|
149 | 131 |
|
150 | 132 |
ImportResult result = defaultImport.invoke(configurator); |
151 | 133 |
String report = result.createReport().toString(); |
... | ... | |
155 | 137 |
Integer expected = 2; |
156 | 138 |
Assert.assertEquals(expected, result.getNewRecords(Reference.class)); |
157 | 139 |
|
158 |
List<Reference> list = referenceService.list(Reference.class, null, null, null, null);
|
|
140 |
List<Reference> referenceList = referenceService.list(Reference.class, null, null, null, null);
|
|
159 | 141 |
Assert.assertEquals("There should be 3 references, the book-section, the book and the source reference", |
160 |
3, list.size());
|
|
142 |
3, referenceList.size());
|
|
161 | 143 |
|
162 | 144 |
//book section |
163 |
Reference bookSection = list.stream().filter(r->r.getType() == ReferenceType.BookSection).findFirst().get();
|
|
145 |
Reference bookSection = referenceList.stream().filter(r->r.getType() == ReferenceType.BookSection).findFirst().get();
|
|
164 | 146 |
//... title |
165 | 147 |
Assert.assertEquals("Cactaceae", bookSection.getTitle()); |
166 | 148 |
//... author |
... | ... | |
182 | 164 |
Assert.assertEquals("1-235", bookSection.getPages()); |
183 | 165 |
|
184 | 166 |
//book |
185 |
Reference book = list.stream().filter(r->r.getType() == ReferenceType.Book).findFirst().get();
|
|
167 |
Reference book = referenceList.stream().filter(r->r.getType() == ReferenceType.Book).findFirst().get();
|
|
186 | 168 |
//... title |
187 | 169 |
Assert.assertEquals("Flora del Valle de Tehuac\u00E1n-Cuicatl\u00E1n", book.getTitle()); |
188 | 170 |
Assert.assertEquals("Fasc\u00EDculo 95", book.getVolume()); |
... | ... | |
190 | 172 |
Assert.assertEquals("Instituto de Biolog\u00EDa, Universidad Nacional Aut\u00F3noma de M\u00E9xico", book.getPublisher()); |
191 | 173 |
|
192 | 174 |
//source reference |
193 |
Reference sourceRef = list.stream().filter(r->r.equals(configurator.getSourceReference())).findFirst().get();
|
|
175 |
Reference sourceRef = referenceList.stream().filter(r->r.equals(configurator.getSourceReference())).findFirst().get();
|
|
194 | 176 |
Assert.assertNotNull(sourceRef); |
195 | 177 |
//TODO cont. |
178 |
|
|
179 |
List<Person> personList = agentService.list(Person.class, null, null, null, null); |
|
180 |
Assert.assertEquals("There should be 5 persons", 5, personList.size()); |
|
181 |
|
|
182 |
List<Team> teamList = agentService.list(Team.class, null, null, null, null); |
|
183 |
Assert.assertEquals("There should be 1 team", 1, teamList.size()); |
|
184 |
|
|
185 |
|
|
186 |
//test deduplication by running it again |
|
187 |
result = defaultImport.invoke(configurator); |
|
188 |
report = result.createReport().toString(); |
|
189 |
Assert.assertTrue(report.contains("Reference: 0")); |
|
190 |
Assert.assertEquals(0, result.getErrors().size() + result.getExceptions().size() + result.getWarnings().size()); |
|
191 |
referenceList = referenceService.list(Reference.class, null, null, null, null); |
|
192 |
Assert.assertEquals("There should still be 3 references, the book-section, the book and the source reference", |
|
193 |
3, referenceList.size()); |
|
194 |
|
|
195 |
personList = agentService.list(Person.class, null, null, null, null); |
|
196 |
Assert.assertEquals("There should still be 5 persons", 5, personList.size()); |
|
197 |
|
|
198 |
teamList = agentService.list(Team.class, null, null, null, null); |
|
199 |
Assert.assertEquals("There should still be 1 team", 1, teamList.size()); |
|
200 |
|
|
201 |
//test deduplication by running another chapter |
|
202 |
RisReferenceImportConfigurator configurator2 = getConfigurator("Arias2012_2.ris"); |
|
203 |
result = defaultImport.invoke(configurator2); |
|
204 |
report = result.createReport().toString(); |
|
205 |
// Assert.assertTrue(report.contains("Reference: 0")); |
|
206 |
Assert.assertEquals(0, result.getErrors().size() + result.getExceptions().size() + result.getWarnings().size()); |
|
207 |
referenceList = referenceService.list(Reference.class, null, null, null, null); |
|
208 |
Assert.assertEquals("There should be 5 references, 2 book-sections, the book and 2 source references", |
|
209 |
5, referenceList.size()); |
|
210 |
|
|
211 |
personList = agentService.list(Person.class, null, null, null, null); |
|
212 |
Assert.assertEquals("There should be 6 persons now", 6, personList.size()); |
|
213 |
|
|
214 |
teamList = agentService.list(Team.class, null, null, null, null); |
|
215 |
Assert.assertEquals("There should be 2 teams now", 2, teamList.size()); |
|
216 |
|
|
196 | 217 |
} |
197 | 218 |
|
198 |
@Test |
|
199 |
//@Ignore |
|
200 |
public void testLongFile() { |
|
201 |
ImportResult result = defaultImport.invoke(configLong); |
|
202 |
String report = result.createReport().toString(); |
|
203 |
System.out.println(report); |
|
219 |
private RisReferenceImportConfigurator getConfigurator(String fileName) { |
|
220 |
String inputFile = "/eu/etaxonomy/cdm/io/reference/ris/in/" + fileName; |
|
221 |
URL url = this.getClass().getResource(inputFile); |
|
222 |
assertNotNull("URL for the test file '" + inputFile + "' does not exist", url); |
|
223 |
try { |
|
224 |
RisReferenceImportConfigurator result = RisReferenceImportConfigurator.NewInstance(url, null); |
|
225 |
result.setDeduplicationMaxCountForFullLoad(1); |
|
226 |
return result; |
|
227 |
} catch (IOException e) { |
|
228 |
Assert.fail("IOException while creating configurator: " + e.getMessage()); |
|
229 |
return null; |
|
230 |
} |
|
231 |
} |
|
204 | 232 |
|
205 |
Integer expected = 118; //did not count yet
|
|
206 |
Assert.assertEquals(expected, result.getNewRecords(Reference.class));
|
|
233 |
@Test
|
|
234 |
public void testLongFile() {
|
|
207 | 235 |
|
208 |
List<Reference> list = referenceService.list(Reference.class, null, null, null, null); |
|
209 |
// Assert.assertEquals("There should be 119 references (still need to count them)", 119, list.size()); |
|
210 |
//TODO deduplication |
|
236 |
RisReferenceImportConfigurator configurator = getConfigurator("Acantholimon.ris"); |
|
237 |
ImportResult result = defaultImport.invoke(configurator); |
|
211 | 238 |
|
212 |
Reference ref58 = list.stream().filter(r->hasId(r, "58", false)).findFirst().get(); |
|
239 |
@SuppressWarnings("unused") |
|
240 |
String report = result.createReport().toString(); |
|
241 |
// System.out.println(report); |
|
242 |
|
|
243 |
// Integer expectedWithoutDeduplication = 118; //did not count yet |
|
244 |
Integer expectedDeduplicated = 104; //did not count yet |
|
245 |
Assert.assertEquals(expectedDeduplicated, result.getNewRecords(Reference.class)); |
|
246 |
// System.out.println("Person: "+ result.getNewRecords(Person.class)); |
|
247 |
// System.out.println("Team: "+ result.getNewRecords(Team.class)); |
|
248 |
|
|
249 |
List<Reference> refList = referenceService.list(Reference.class, null, null, null, null); |
|
250 |
// Assert.assertEquals("There should be 119 references (still need to count them)", 119, refList.size()); |
|
251 |
Collections.sort(refList, (r1,r2) -> r1.getTitleCache().compareTo(r2.getTitleCache())); |
|
252 |
printList(refList); |
|
253 |
List<Person> personList = agentService.list(Person.class, null, null, null, null); |
|
254 |
printList(personList); |
|
255 |
Assert.assertEquals(99, personList.size()); |
|
256 |
List<Team> teamList = agentService.list(Team.class, null, null, null, null); |
|
257 |
printList(teamList); |
|
258 |
Assert.assertEquals(33, teamList.size()); |
|
259 |
List<Institution> institutionList = agentService.list(Institution.class, null, null, null, null); |
|
260 |
printList(institutionList); |
|
261 |
Assert.assertEquals(0, institutionList.size()); |
|
262 |
|
|
263 |
|
|
264 |
Reference ref58 = refList.stream().filter(r->hasId(r, "58", false)).findFirst().get(); |
|
213 | 265 |
Assert.assertNotNull("", ref58); |
214 | 266 |
Assert.assertEquals((Integer)2003, ref58.getDatePublished().getStartYear()); |
215 | 267 |
|
216 |
Reference ref53 = list.stream().filter(r->hasId(r, "53", false)).findFirst().get();
|
|
268 |
Reference ref53 = refList.stream().filter(r->hasId(r, "53", false)).findFirst().get();
|
|
217 | 269 |
Assert.assertNotNull("", ref53); |
218 | 270 |
Assert.assertEquals(ReferenceType.BookSection, ref53.getType()); |
219 | 271 |
Assert.assertNotNull("", ref53.getInReference()); |
... | ... | |
265 | 317 |
// } |
266 | 318 |
} |
267 | 319 |
|
320 |
private void printList(List<? extends IdentifiableEntity<?>> list) { |
|
321 |
if (!logger.isDebugEnabled()){ |
|
322 |
return; |
|
323 |
} |
|
324 |
System.out.println(list.size()); |
|
325 |
Collections.sort(list, (p1,p2) -> p1.getTitleCache().compareTo(p2.getTitleCache())); |
|
326 |
list.stream().forEach(r->System.out.println(r.getTitleCache())); |
|
327 |
} |
|
328 |
|
|
268 | 329 |
private boolean hasId(Reference ref, String idStr, boolean getInRef) { |
269 | 330 |
if (ref.getSources().size() != 1){ |
270 | 331 |
return false; |
cdmlib-io/src/test/resources/eu/etaxonomy/cdm/io/reference/ris/in/Arias2012_2.ris | ||
---|---|---|
1 |
TY - CHAP |
|
2 |
A2 - Medina, L. R. |
|
3 |
AU - Arias, S. |
|
4 |
AU - Gama-Cruz, A. |
|
5 |
CY - México D. F. |
|
6 |
PB - Instituto de Biología, Universidad Nacional Autónoma de México |
|
7 |
PY - 2012 |
|
8 |
SP - 236-247 |
|
9 |
ST - Cactaceae2 |
|
10 |
T2 - Flora del Valle de Tehuacán-Cuicatlán |
|
11 |
TI - Cactaceae2 |
|
12 |
VL - Fascículo 95 |
|
13 |
ID - 3825 |
|
14 |
ER - |
Also available in: Unified diff
ref #9889 add direct matching on persistence to deduplication helper and replace for reference.author and .inReference