From 2cbe894f0b870ca0e47cb66077ba122190832a56 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andreas=20M=C3=BCller?= Date: Tue, 21 Dec 2010 09:26:36 +0000 Subject: [PATCH] bugfix for partitioned deduplicate --- .../api/service/IdentifiableServiceBase.java | 86 +++++++++++++------ 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentifiableServiceBase.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentifiableServiceBase.java index c7dfdced1e..eb9e1acc7f 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentifiableServiceBase.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentifiableServiceBase.java @@ -248,12 +248,24 @@ public abstract class IdentifiableServiceBase clazz, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) { + DeduplicateState dedupState = new DeduplicateState(); + if (clazz == null){ logger.warn("Deduplication clazz must not be null!"); return 0; @@ -266,39 +278,65 @@ public abstract class IdentifiableServiceBase nextGroup = new ArrayList(); - String lastTitleCache = null; - Number countPagesN = Math.ceil(countTotal/pageSize.doubleValue()) ; - int countPages = countPagesN.intValue(); + int result = 0; +// double countTotal = count(clazz); +// +// Number countPagesN = Math.ceil(countTotal/dedupState.pageSize.doubleValue()) ; +// int countPages = countPagesN.intValue(); +// - //TODO test paging - for (int i = 0; i< countPages ; i++){ - List orderHints = Arrays.asList(new OrderHint[]{new OrderHint("titleCache", SortOrder.ASCENDING)}); - List objectList = listByTitle(clazz, null, null, null, pageSize, i, orderHints, null); + List orderHints = Arrays.asList(new OrderHint[]{new OrderHint("titleCache", SortOrder.ASCENDING)}); - for (T object : objectList){ - String currentTitleCache = object.getTitleCache(); - if (currentTitleCache != null && currentTitleCache.equals(lastTitleCache)){ - //=titleCache - nextGroup.add(object); - }else{ - //<> titleCache - result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy); - nextGroup = new ArrayList(); - nextGroup.add(object); - } - lastTitleCache = currentTitleCache; - } + while (! dedupState.isCompleted){ + //get x page sizes + List objectList = getPages(clazz, dedupState, orderHints); + //after each page check if any changes took place + int nUnEqualPages = handleAllPages(objectList, dedupState, nextGroup, matchStrategy, mergeStrategy); + nUnEqualPages = nUnEqualPages + dedupState.pageSize * dedupState.startPage; + //refresh start page counter + int finishedPages = nUnEqualPages / dedupState.pageSize; + dedupState.startPage = finishedPages; } + result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy); return result; } + + private int handleAllPages(List objectList, DeduplicateState dedupState, List nextGroup, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) { + int nUnEqual = 0; + for (T object : objectList){ + String currentTitleCache = object.getTitleCache(); + if (currentTitleCache != null && currentTitleCache.equals(dedupState.lastTitleCache)){ + //=titleCache + nextGroup.add(object); + }else{ + //<> titleCache + dedupState.result += handleLastGroup(nextGroup, matchStrategy, mergeStrategy); + nextGroup = new ArrayList(); + nextGroup.add(object); + nUnEqual++; + } + dedupState.lastTitleCache = currentTitleCache; + } + handleLastGroup(nextGroup, matchStrategy, mergeStrategy); + return nUnEqual; + } + + private List getPages(Class clazz, DeduplicateState dedupState, List orderHints) { + List result = new ArrayList(); + for (int pageNo = dedupState.startPage; pageNo < dedupState.startPage + dedupState.nPages; pageNo++){ + List objectList = listByTitle(clazz, null, null, null, dedupState.pageSize, pageNo, orderHints, null); + result.addAll(objectList); + } + if (result.size()< dedupState.nPages * dedupState.pageSize ){ + dedupState.isCompleted = true; + } + return result; + } + private int handleLastGroup(List group, IMatchStrategy matchStrategy, IMergeStrategy mergeStrategy) { int result = 0; int size = group.size(); -- 2.34.1