Project

General

Profile

« Previous | Next » 

Revision 9355fbbb

Added by Andreas Müller over 4 years ago

ref #1444 fix ResultSetPartitioner for imports with >1 ID columns

View differences:

cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/ResultSetPartitioner.java
87 87
	 */
88 88
	private int[] currentIdListType;
89 89

  
90
	private String lastPartitionHighestIDs;
91

  
92
	boolean nextAlreadyCalled = false;
93

  
90 94
	/**
91 95
	 * counter for the partitions
92 96
	 */
93 97
	private int currentPartition;
94 98

  
99

  
100
	/**
101
	 * counter for all records
102
	 */
103
	private int allRecords;
104

  
95 105
	/**
96 106
	 * number of records in the current partition
97 107
	 */
......
175 185
		ResultSetMetaData metaData = idResultSet.getMetaData();
176 186
		int nOfIdColumns = metaData.getColumnCount();
177 187
		currentPartition++;
178
		currentIdLists = new ArrayList[nOfIdColumns];
188

  
189
		currentIdLists = new List[nOfIdColumns];
179 190
		currentIdListType = new int[nOfIdColumns];
180 191

  
181 192
		for (int col = 0; col< currentIdLists.length; col++){
182 193
			currentIdLists[col] = new ArrayList<>();
183 194
			currentIdListType[col] = metaData.getColumnType(col + 1);
184 195
		}
185
		List<String> currentIdList;
186 196

  
187 197
		int i = 0;
188 198
		//for each record
189
		for (i = 0; i < partitionSize; i++){
190
			if (idResultSet.next() == false){
191
				break;
199
		for (i = 0; i < partitionSize || !firstIdIsNew(); i++){
200
			if( !nextAlreadyCalled){
201
			    if (!idResultSet.next()){
202
			        break;
203
			    }
204
			}else if (idResultSet.isAfterLast()){
205
			    break;
192 206
			}
207
			nextAlreadyCalled = false;
208
		    allRecords++;
193 209
			//for each column
194 210
			for (int colIndex = 0; colIndex < nOfIdColumns; colIndex++){
195
				Object oNextId = idResultSet.getObject(colIndex + 1);
196
				String strNextId = String.valueOf(oNextId);
197
				currentIdList = currentIdLists[colIndex];
211

  
212
			    String strNextId = String.valueOf(idResultSet.getObject(colIndex + 1));
213
				List<String> currentIdList = currentIdLists[colIndex];
198 214
				currentIdList.add(strNextId);
215
				if(colIndex == 0){
216
				    lastPartitionHighestIDs = strNextId;
217
				}
199 218
			}
200 219
			result = true; //true if at least one record was read
201 220
		}
......
204 223
		return result;
205 224
	}
206 225

  
207

  
208

  
209
	/**
226
    /**
227
     * Checks if the current partition may have duplicates that were handled already.
228
     * This may happen if the result set has >1 columns and if the first column does
229
     * not change it value with the first record but with a later record and the
230
     * following columns jump back with there values.
231
     * E.g. first result set contains (x1=1,x2=3) and with the second result set
232
     * we ask for x1 in (1,2) x2 in (1-5, 10-15) where the 1-5 comes from x1=2.
233
     * This should not happen and therefore we increase the partition a bit such that
234
     * the first column always changes its value and therefore it is guranteed that
235
     * such duplicates will never exist.
236
     * The reason for this problem is, that we do not create tuples in the WHERE clause
237
     * of getIdRecord but we handle the range for each column separately. This is not correct
238
     * but handling of tuples is more difficult in SQL.
239
     * As multiple columns do not appear so often this workaround seems acceptable.
240
     * @return
241
     * @throws SQLException
242
     */
243
    private boolean firstIdIsNew() throws SQLException {
244
        String last = lastPartitionHighestIDs;
245
        if (!idResultSet.next()){
246
            return true;
247
        }
248
        nextAlreadyCalled = true;
249
        String current = String.valueOf(idResultSet.getObject(1));
250
        return !current.equals(last);
251
    }
252

  
253
    /**
210 254
	 * Returns the underlying resultSet holding all records needed to handle the partition.<BR>
211 255
	 * @return
212 256
	 */
......
266 310
	 *
267 311
	 */
268 312
	private int getCurrentNumberOfRows() {
269
		return ((currentPartition - 1) * partitionSize + rowsInCurrentPartition);
313
	    return allRecords;
314
//		return ((currentPartition - 1) * partitionSize + rowsInCurrentPartition);
270 315
	}
271 316

  
272 317

  

Also available in: Unified diff