+
+
+
+ private class DeduplicateState{
+ String lastTitleCache;
+ Integer pageSize = 50;
+ int nPages = 3;
+ int startPage = 0;
+ boolean isCompleted = false;
+ int result;
+ }
+
+ @Override
+ @Transactional(readOnly = false)
+ public int deduplicate(Class<? extends T> clazz, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) {
+ DeduplicateState dedupState = new DeduplicateState();
+
+ if (clazz == null){
+ logger.warn("Deduplication clazz must not be null!");
+ return 0;
+ }
+ if (! ( IMatchable.class.isAssignableFrom(clazz) && IMergable.class.isAssignableFrom(clazz) ) ){
+ logger.warn("Deduplication implemented only for classes implementing IMatchable and IMergeable. No deduplication performed!");
+ return 0;
+ }
+ Class matchableClass = clazz;
+ if (matchStrategy == null){
+ matchStrategy = DefaultMatchStrategy.NewInstance(matchableClass);
+ }
+ List<T> nextGroup = new ArrayList<T>();
+
+ int result = 0;
+// double countTotal = count(clazz);
+//
+// Number countPagesN = Math.ceil(countTotal/dedupState.pageSize.doubleValue()) ;
+// int countPages = countPagesN.intValue();
+//
+
+ List<OrderHint> orderHints = Arrays.asList(new OrderHint[]{new OrderHint("titleCache", SortOrder.ASCENDING)});
+
+ while (! dedupState.isCompleted){
+ //get x page sizes
+ List<T> objectList = getPages(clazz, dedupState, orderHints);
+ //after each page check if any changes took place
+ int nUnEqualPages = handleAllPages(objectList, dedupState, nextGroup, matchStrategy, mergeStrategy);
+ nUnEqualPages = nUnEqualPages + dedupState.pageSize * dedupState.startPage;
+ //refresh start page counter
+ int finishedPages = nUnEqualPages / dedupState.pageSize;
+ dedupState.startPage = finishedPages;
+ }
+
+ result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy);
+ return result;
+ }
+
+
+ private int handleAllPages(List<T> objectList, DeduplicateState dedupState, List<T> nextGroup, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) {
+ int nUnEqual = 0;
+ for (T object : objectList){
+ String currentTitleCache = object.getTitleCache();
+ if (currentTitleCache != null && currentTitleCache.equals(dedupState.lastTitleCache)){
+ //=titleCache
+ nextGroup.add(object);
+ }else{
+ //<> titleCache
+ dedupState.result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy);
+ nextGroup = new ArrayList<T>();
+ nextGroup.add(object);
+ nUnEqual++;
+ }
+ dedupState.lastTitleCache = currentTitleCache;
+ }
+ handleLastGroup(nextGroup, matchStrategy, mergeStrategy);
+ return nUnEqual;
+ }
+
+ private List<T> getPages(Class<? extends T> clazz, DeduplicateState dedupState, List<OrderHint> orderHints) {
+ List<T> result = new ArrayList<T>();
+ for (int pageNo = dedupState.startPage; pageNo < dedupState.startPage + dedupState.nPages; pageNo++){
+ List<T> objectList = listByTitle(clazz, null, null, null, dedupState.pageSize, pageNo, orderHints, null);
+ result.addAll(objectList);
+ }
+ if (result.size()< dedupState.nPages * dedupState.pageSize ){
+ dedupState.isCompleted = true;
+ }
+ return result;
+ }
+
+ private int handleLastGroup(List<T> group, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) {
+ int result = 0;
+ int size = group.size();
+ Set<Integer> exclude = new HashSet<Integer>(); //set to collect all objects, that have been merged already
+ for (int i = 0; i < size - 1; i++){
+ if (exclude.contains(i)){
+ continue;
+ }
+ for (int j = i + 1; j < size; j++){
+ if (exclude.contains(j)){
+ continue;
+ }
+ T firstObject = group.get(i);
+ T secondObject = group.get(j);
+
+ try {
+ if (matchStrategy.invoke((IMatchable)firstObject, (IMatchable)secondObject)){
+ commonService.merge((IMergable)firstObject, (IMergable)secondObject, mergeStrategy);
+ exclude.add(j);
+ result++;
+ }
+ } catch (MatchException e) {
+ logger.warn("MatchException when trying to match " + firstObject.getTitleCache());
+ e.printStackTrace();
+ } catch (MergeException e) {
+ logger.warn("MergeException when trying to merge " + firstObject.getTitleCache());
+ e.printStackTrace();
+ }
+ }
+ }
+ return result;
+ }
+
+ @Transactional(readOnly = true)
+ @Override
+ public Integer countByTitle(Class<? extends T> clazz, String queryString,MatchMode matchmode, List<Criterion> criteria){
+ Integer numberOfResults = dao.countByTitle(clazz, queryString, matchmode, criteria);
+
+ return numberOfResults;
+ }
+
+ @Transactional(readOnly = true)
+ @Override
+ public Integer countByTitle(IIdentifiableEntityServiceConfigurator<T> config){
+ return countByTitle(config.getClazz(), config.getTitleSearchStringSqlized(),
+ config.getMatchMode(), config.getCriteria());
+
+ }
+
+ @Override
+ @Transactional(readOnly = true)
+ public <S extends T> Pager<FindByIdentifierDTO<S>> findByIdentifier(
+ Class<S> clazz, String identifier, DefinedTerm identifierType, MatchMode matchmode,
+ boolean includeEntity, Integer pageSize,
+ Integer pageNumber, List<String> propertyPaths) {
+
+ Integer numberOfResults = dao.countByIdentifier(clazz, identifier, identifierType, matchmode);
+ List<Object[]> daoResults = new ArrayList<Object[]>();
+ if(numberOfResults > 0) { // no point checking again
+ daoResults = dao.findByIdentifier(clazz, identifier, identifierType,
+ matchmode, includeEntity, pageSize, pageNumber, propertyPaths);
+ }
+
+ List<FindByIdentifierDTO<S>> result = new ArrayList<FindByIdentifierDTO<S>>();
+ for (Object[] daoObj : daoResults){
+ if (includeEntity){
+ result.add(new FindByIdentifierDTO<S>((DefinedTerm)daoObj[0], (String)daoObj[1], (S)daoObj[2]));
+ }else{
+ result.add(new FindByIdentifierDTO<S>((DefinedTerm)daoObj[0], (String)daoObj[1], (UUID)daoObj[2], (String)daoObj[3]));
+ }
+ }
+ return new DefaultPagerImpl<FindByIdentifierDTO<S>>(pageNumber, numberOfResults, pageSize, result);
+ }
+
+
+}