Revision 9355fbbb
Added by Andreas Müller over 4 years ago
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/ResultSetPartitioner.java | ||
---|---|---|
87 | 87 |
*/ |
88 | 88 |
private int[] currentIdListType; |
89 | 89 |
|
90 |
private String lastPartitionHighestIDs; |
|
91 |
|
|
92 |
boolean nextAlreadyCalled = false; |
|
93 |
|
|
90 | 94 |
/** |
91 | 95 |
* counter for the partitions |
92 | 96 |
*/ |
93 | 97 |
private int currentPartition; |
94 | 98 |
|
99 |
|
|
100 |
/** |
|
101 |
* counter for all records |
|
102 |
*/ |
|
103 |
private int allRecords; |
|
104 |
|
|
95 | 105 |
/** |
96 | 106 |
* number of records in the current partition |
97 | 107 |
*/ |
... | ... | |
175 | 185 |
ResultSetMetaData metaData = idResultSet.getMetaData(); |
176 | 186 |
int nOfIdColumns = metaData.getColumnCount(); |
177 | 187 |
currentPartition++; |
178 |
currentIdLists = new ArrayList[nOfIdColumns]; |
|
188 |
|
|
189 |
currentIdLists = new List[nOfIdColumns]; |
|
179 | 190 |
currentIdListType = new int[nOfIdColumns]; |
180 | 191 |
|
181 | 192 |
for (int col = 0; col< currentIdLists.length; col++){ |
182 | 193 |
currentIdLists[col] = new ArrayList<>(); |
183 | 194 |
currentIdListType[col] = metaData.getColumnType(col + 1); |
184 | 195 |
} |
185 |
List<String> currentIdList; |
|
186 | 196 |
|
187 | 197 |
int i = 0; |
188 | 198 |
//for each record |
189 |
for (i = 0; i < partitionSize; i++){ |
|
190 |
if (idResultSet.next() == false){ |
|
191 |
break; |
|
199 |
for (i = 0; i < partitionSize || !firstIdIsNew(); i++){ |
|
200 |
if( !nextAlreadyCalled){ |
|
201 |
if (!idResultSet.next()){ |
|
202 |
break; |
|
203 |
} |
|
204 |
}else if (idResultSet.isAfterLast()){ |
|
205 |
break; |
|
192 | 206 |
} |
207 |
nextAlreadyCalled = false; |
|
208 |
allRecords++; |
|
193 | 209 |
//for each column |
194 | 210 |
for (int colIndex = 0; colIndex < nOfIdColumns; colIndex++){ |
195 |
Object oNextId = idResultSet.getObject(colIndex + 1); |
|
196 |
String strNextId = String.valueOf(oNextId);
|
|
197 |
currentIdList = currentIdLists[colIndex]; |
|
211 |
|
|
212 |
String strNextId = String.valueOf(idResultSet.getObject(colIndex + 1));
|
|
213 |
List<String> currentIdList = currentIdLists[colIndex];
|
|
198 | 214 |
currentIdList.add(strNextId); |
215 |
if(colIndex == 0){ |
|
216 |
lastPartitionHighestIDs = strNextId; |
|
217 |
} |
|
199 | 218 |
} |
200 | 219 |
result = true; //true if at least one record was read |
201 | 220 |
} |
... | ... | |
204 | 223 |
return result; |
205 | 224 |
} |
206 | 225 |
|
207 |
|
|
208 |
|
|
209 |
/** |
|
226 |
/** |
|
227 |
* Checks if the current partition may have duplicates that were handled already. |
|
228 |
* This may happen if the result set has >1 columns and if the first column does |
|
229 |
* not change it value with the first record but with a later record and the |
|
230 |
* following columns jump back with there values. |
|
231 |
* E.g. first result set contains (x1=1,x2=3) and with the second result set |
|
232 |
* we ask for x1 in (1,2) x2 in (1-5, 10-15) where the 1-5 comes from x1=2. |
|
233 |
* This should not happen and therefore we increase the partition a bit such that |
|
234 |
* the first column always changes its value and therefore it is guranteed that |
|
235 |
* such duplicates will never exist. |
|
236 |
* The reason for this problem is, that we do not create tuples in the WHERE clause |
|
237 |
* of getIdRecord but we handle the range for each column separately. This is not correct |
|
238 |
* but handling of tuples is more difficult in SQL. |
|
239 |
* As multiple columns do not appear so often this workaround seems acceptable. |
|
240 |
* @return |
|
241 |
* @throws SQLException |
|
242 |
*/ |
|
243 |
private boolean firstIdIsNew() throws SQLException { |
|
244 |
String last = lastPartitionHighestIDs; |
|
245 |
if (!idResultSet.next()){ |
|
246 |
return true; |
|
247 |
} |
|
248 |
nextAlreadyCalled = true; |
|
249 |
String current = String.valueOf(idResultSet.getObject(1)); |
|
250 |
return !current.equals(last); |
|
251 |
} |
|
252 |
|
|
253 |
/** |
|
210 | 254 |
* Returns the underlying resultSet holding all records needed to handle the partition.<BR> |
211 | 255 |
* @return |
212 | 256 |
*/ |
... | ... | |
266 | 310 |
* |
267 | 311 |
*/ |
268 | 312 |
private int getCurrentNumberOfRows() { |
269 |
return ((currentPartition - 1) * partitionSize + rowsInCurrentPartition); |
|
313 |
return allRecords; |
|
314 |
// return ((currentPartition - 1) * partitionSize + rowsInCurrentPartition); |
|
270 | 315 |
} |
271 | 316 |
|
272 | 317 |
|
Also available in: Unified diff
ref #1444 fix ResultSetPartitioner for imports with >1 ID columns