deduplication
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / IdentifiableServiceBase.java
1 // $Id$
2 /**
3 * Copyright (C) 2007 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10
11 package eu.etaxonomy.cdm.api.service;
12
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.HashSet;
16 import java.util.List;
17 import java.util.Set;
18
19 import org.apache.log4j.Logger;
20 import org.hibernate.criterion.Criterion;
21 import org.springframework.beans.factory.annotation.Autowired;
22 import org.springframework.transaction.annotation.Transactional;
23
24 import eu.etaxonomy.cdm.api.service.config.IIdentifiableEntityServiceConfigurator;
25 import eu.etaxonomy.cdm.api.service.pager.Pager;
26 import eu.etaxonomy.cdm.api.service.pager.impl.DefaultPagerImpl;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28 import eu.etaxonomy.cdm.model.common.ISourceable;
29 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
30 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
31 import eu.etaxonomy.cdm.model.common.LSID;
32 import eu.etaxonomy.cdm.model.common.UuidAndTitleCache;
33 import eu.etaxonomy.cdm.model.media.Rights;
34 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
35 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
36 import eu.etaxonomy.cdm.persistence.dao.common.IIdentifiableDao;
37 import eu.etaxonomy.cdm.persistence.query.MatchMode;
38 import eu.etaxonomy.cdm.persistence.query.OrderHint;
39 import eu.etaxonomy.cdm.persistence.query.OrderHint.SortOrder;
40 import eu.etaxonomy.cdm.strategy.cache.common.IIdentifiableEntityCacheStrategy;
41 import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
42 import eu.etaxonomy.cdm.strategy.match.IMatchStrategy;
43 import eu.etaxonomy.cdm.strategy.match.IMatchable;
44 import eu.etaxonomy.cdm.strategy.match.MatchException;
45 import eu.etaxonomy.cdm.strategy.merge.IMergable;
46 import eu.etaxonomy.cdm.strategy.merge.IMergeStrategy;
47 import eu.etaxonomy.cdm.strategy.merge.MergeException;
48
49 public abstract class IdentifiableServiceBase<T extends IdentifiableEntity,DAO extends IIdentifiableDao<T>> extends AnnotatableServiceBase<T,DAO>
50 implements IIdentifiableEntityService<T>{
51
52 @Autowired
53 protected ICommonService commonService;
54
55
56 protected static final int UPDATE_TITLE_CACHE_DEFAULT_STEP_SIZE = 1000;
57 protected static final Logger logger = Logger.getLogger(IdentifiableServiceBase.class);
58
59 @Transactional(readOnly = true)
60 public Pager<Rights> getRights(T t, Integer pageSize, Integer pageNumber, List<String> propertyPaths) {
61 Integer numberOfResults = dao.countRights(t);
62
63 List<Rights> results = new ArrayList<Rights>();
64 if(numberOfResults > 0) { // no point checking again
65 results = dao.getRights(t, pageSize, pageNumber,propertyPaths);
66 }
67
68 return new DefaultPagerImpl<Rights>(pageNumber, numberOfResults, pageSize, results);
69 }
70
71 @Transactional(readOnly = true)
72 public Pager<IdentifiableSource> getSources(T t, Integer pageSize, Integer pageNumber, List<String> propertyPaths) {
73 Integer numberOfResults = dao.countSources(t);
74
75 List<IdentifiableSource> results = new ArrayList<IdentifiableSource>();
76 if(numberOfResults > 0) { // no point checking again
77 results = dao.getSources(t, pageSize, pageNumber,propertyPaths);
78 }
79
80 return new DefaultPagerImpl<IdentifiableSource>(pageNumber, numberOfResults, pageSize, results);
81 }
82
83 @Transactional(readOnly = true)
84 protected List<T> findByTitle(IIdentifiableEntityServiceConfigurator config){
85 return ((IIdentifiableDao)dao).findByTitle(config.getTitleSearchString(),
86 config.getMatchMode(), 0, -1, null);
87 // TODO: Implement parameters pageSize, pageNumber, and criteria
88 }
89
90 @Transactional(readOnly = false)
91 public T replace(T x, T y) {
92 return dao.replace(x, y);
93 }
94 /**
95 * FIXME Candidate for harmonization
96 * Given that this method is strongly typed, and generic, could we not simply expose it as
97 * List<T> findByTitle(String title) as it is somewhat less cumbersome. Admittedly, I don't
98 * understand what is going on with the configurators etc. so maybe there is a good reason for
99 * the design of this method.
100 * @param title
101 * @return
102 */
103 @Transactional(readOnly = true)
104 protected List<T> findCdmObjectsByTitle(String title){
105 return ((IIdentifiableDao)dao).findByTitle(title);
106 }
107
108 @Transactional(readOnly = true)
109 protected List<T> findCdmObjectsByTitle(String title, Class<T> clazz){
110 return ((IIdentifiableDao)dao).findByTitleAndClass(title, clazz);
111 }
112 @Transactional(readOnly = true)
113 protected List<T> findCdmObjectsByTitle(String title, CdmBase sessionObject){
114 return ((IIdentifiableDao)dao).findByTitle(title, sessionObject);
115 }
116
117 /*
118 * TODO - Migrated from CommonServiceBase
119 * (non-Javadoc)
120 * @see eu.etaxonomy.cdm.api.service.ICommonService#getSourcedObjectById(java.lang.String, java.lang.String)
121 */
122 @Transactional(readOnly = true)
123 public ISourceable getSourcedObjectByIdInSource(Class clazz, String idInSource, String idNamespace) {
124 ISourceable result = null;
125
126 List<T> list = dao.findOriginalSourceByIdInSource(idInSource, idNamespace);
127 if (! list.isEmpty()){
128 result = list.get(0);
129 }
130 return result;
131 }
132
133 /* (non-Javadoc)
134 * @see eu.etaxonomy.cdm.api.service.IIdentifiableEntityService#getUuidAndTitleCache()
135 */
136 @Transactional(readOnly = true)
137 public List<UuidAndTitleCache<T>> getUuidAndTitleCache() {
138 return dao.getUuidAndTitleCache();
139 }
140
141 @Transactional(readOnly = true)
142 public Pager<T> findByTitle(Class<? extends T> clazz, String queryString,MatchMode matchmode, List<Criterion> criteria, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) {
143 Integer numberOfResults = dao.countByTitle(clazz, queryString, matchmode, criteria);
144
145 List<T> results = new ArrayList<T>();
146 if(numberOfResults > 0) { // no point checking again
147 results = dao.findByTitle(clazz, queryString, matchmode, criteria, pageSize, pageNumber, orderHints, propertyPaths);
148 }
149
150 return new DefaultPagerImpl<T>(pageNumber, numberOfResults, pageSize, results);
151 }
152
153 @Transactional(readOnly = true)
154 public List<T> listByTitle(Class<? extends T> clazz, String queryString,MatchMode matchmode, List<Criterion> criteria, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) {
155 Integer numberOfResults = dao.countByTitle(clazz, queryString, matchmode, criteria);
156
157 List<T> results = new ArrayList<T>();
158 if(numberOfResults > 0) { // no point checking again
159 results = dao.findByTitle(clazz, queryString, matchmode, criteria, pageSize, pageNumber, orderHints, propertyPaths);
160 }
161 return results;
162 }
163
164 @Transactional(readOnly = true)
165 public List<T> listByReferenceTitle(Class<? extends T> clazz, String queryString,MatchMode matchmode, List<Criterion> criteria, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) {
166 Integer numberOfResults = dao.countByReferenceTitle(clazz, queryString, matchmode, criteria);
167
168 List<T> results = new ArrayList<T>();
169 if(numberOfResults > 0) { // no point checking again
170 results = dao.findByReferenceTitle(clazz, queryString, matchmode, criteria, pageSize, pageNumber, orderHints, propertyPaths);
171 }
172 return results;
173 }
174
175 @Transactional(readOnly = true)
176 public T find(LSID lsid) {
177 return dao.find(lsid);
178 }
179
180 @Transactional(readOnly = true)
181 public Pager<T> search(Class<? extends T> clazz, String queryString, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) {
182 Integer numberOfResults = dao.count(clazz,queryString);
183
184 List<T> results = new ArrayList<T>();
185 if(numberOfResults > 0) { // no point checking again
186 results = dao.search(clazz,queryString, pageSize, pageNumber, orderHints, propertyPaths);
187 }
188
189 return new DefaultPagerImpl<T>(pageNumber, numberOfResults, pageSize, results);
190 }
191
192 @Transactional(readOnly = false)
193 public void updateTitleCache(Class<? extends T> clazz) {
194 IIdentifiableEntityCacheStrategy<T> cacheStrategy = null;
195 updateTitleCache(clazz, UPDATE_TITLE_CACHE_DEFAULT_STEP_SIZE, cacheStrategy);
196 }
197
198
199 @Transactional(readOnly = false) //TODO check transactional behaviour, e.g. what happens with the session if count is very large
200 public void updateTitleCache(Class<? extends T> clazz, Integer stepSize, IIdentifiableEntityCacheStrategy<T> cacheStrategy) {
201 if (stepSize == null){
202 stepSize = UPDATE_TITLE_CACHE_DEFAULT_STEP_SIZE;
203 }
204
205 int count = dao.count(clazz);
206 for(int i = 0 ; i < count ; i = i + stepSize){
207 // not sure if such strict ordering is necessary here, but for safety reasons I do it
208 ArrayList<OrderHint> orderHints = new ArrayList<OrderHint>();
209 orderHints.add( new OrderHint("id", OrderHint.SortOrder.ASCENDING));
210 List<T> list = this.list(clazz, stepSize, i, orderHints, null);
211 List<T> entitiesToUpdate = new ArrayList<T>();
212 for (T entity : list){
213 if (entity.isProtectedTitleCache() == false){
214 IIdentifiableEntityCacheStrategy entityCacheStrategy = cacheStrategy;
215 if (entityCacheStrategy == null){
216 entityCacheStrategy = entity.getCacheStrategy();
217 //FIXME find out why the wrong cache strategy is loaded here, see #1876
218 if (entity instanceof ReferenceBase){
219 entityCacheStrategy = ReferenceFactory.newReference(((ReferenceBase)entity).getType()).getCacheStrategy();
220 }
221 }
222 entity.setCacheStrategy(entityCacheStrategy);
223 //TODO this won't work for those classes that always generate the title cache new
224 String titleCache = entity.getTitleCache();
225 setOtherCachesNull(entity); //TODO find better solution
226 String newTitleCache = entityCacheStrategy.getTitleCache(entity);
227 if (titleCache == null || titleCache != null && ! titleCache.equals(newTitleCache)){
228 entity.setTitleCache(null, false);
229 entity.getTitleCache();
230 entitiesToUpdate.add(entity);
231 }
232 }
233 }
234 saveOrUpdate(entitiesToUpdate);
235
236 }
237 }
238
239
240
241 /**
242 * Needs override if not only the title cache should be set to null to
243 * generate the correct new title cache
244 */
245 protected void setOtherCachesNull(T entity) {
246 return;
247 }
248
249 // @Override
250 // public int deduplicate(Class<? extends IdentifiableEntity> clazz, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy);
251
252
253 @Override
254 public int deduplicate(Class<? extends T> clazz, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) {
255 // if (clazz == null){
256 // clazz = this.getCT.class;
257 // }
258 if (! ( IMatchable.class.isAssignableFrom(clazz) && IMergable.class.isAssignableFrom(clazz) ) ){
259 logger.warn("Deduplication implemented only for classes implementing IMatchable and IMergeable. No deduplication performed!");
260 return 0;
261 }
262 Class matchableClass = clazz;
263 if (matchStrategy == null){
264 matchStrategy = DefaultMatchStrategy.NewInstance(matchableClass);
265 }
266
267 int result = 0;
268 double countTotal = count(clazz);
269 Integer pageSize = 1000;
270 List<T> nextGroup = new ArrayList<T>();
271 String lastTitleCache = null;
272
273 Number countPagesN = Math.ceil(countTotal/pageSize.doubleValue()) ;
274 int countPages = countPagesN.intValue();
275
276 for (int i = 0; i< countPages ; i++){
277 List<OrderHint> orderHints = Arrays.asList(new OrderHint[]{new OrderHint("titleCache", SortOrder.ASCENDING)});
278 List<T> objectList = listByTitle(clazz, null, null, null, pageSize, i, orderHints, null);
279
280 for (T object : objectList){
281 String currentTitleCache = object.getTitleCache();
282 if (currentTitleCache != null && currentTitleCache.equals(lastTitleCache)){
283 //=titleCache
284 nextGroup.add(object);
285 }else{
286 //<> titleCache
287 result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy);
288 nextGroup = new ArrayList<T>();
289 nextGroup.add(object);
290 }
291 lastTitleCache = currentTitleCache;
292 }
293 }
294 result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy);
295 return result;
296 }
297
298 private int handleLastGroup(List<T> group, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) {
299 int result = 0;
300 int size = group.size();
301 Set<Integer> exclude = new HashSet<Integer>(); //set to collect all objects, that have been merged already
302 for (int i = 0; i < size - 1; i++){
303 if (exclude.contains(i)){
304 continue;
305 }
306 for (int j = i + 1; j < size; j++){
307 if (exclude.contains(j)){
308 continue;
309 }
310 T firstObject = group.get(i);
311 T secondObject = group.get(j);
312
313 try {
314 if (matchStrategy.invoke((IMatchable)firstObject, (IMatchable)secondObject)){
315 commonService.merge((IMergable)firstObject, (IMergable)secondObject, mergeStrategy);
316 exclude.add(j);
317 result++;
318 }
319 } catch (MatchException e) {
320 logger.warn("MatchException when trying to match " + firstObject.getTitleCache());
321 e.printStackTrace();
322 } catch (MergeException e) {
323 logger.warn("MergeException when trying to merge " + firstObject.getTitleCache());
324 e.printStackTrace();
325 }
326 }
327 }
328 return result;
329 }
330
331 }
332