1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.berlinModel.in;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
|
21
|
import org.apache.log4j.Logger;
|
22
|
import org.springframework.stereotype.Component;
|
23
|
|
24
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25
|
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceSourceImportValidator;
|
26
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
27
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
28
|
import eu.etaxonomy.cdm.io.common.Source;
|
29
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
30
|
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
|
31
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
32
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
33
|
import eu.etaxonomy.cdm.model.name.NonViralName;
|
34
|
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
|
35
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
36
|
|
37
|
|
38
|
/**
|
39
|
* @author a.mueller
|
40
|
* @created 20.03.2008
|
41
|
* @version 1.0
|
42
|
*/
|
43
|
@Component
|
44
|
public class BerlinModelOccurrenceSourceImport extends BerlinModelImportBase {
|
45
|
private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceSourceImport.class);
|
46
|
|
47
|
private static int modCount = 5000;
|
48
|
private static final String pluralString = "occurrence sources";
|
49
|
private static final String dbTableName = "emOccurrenceSource"; //??
|
50
|
|
51
|
private Map<String, Integer> sourceNumberRefIdMap;
|
52
|
|
53
|
|
54
|
public BerlinModelOccurrenceSourceImport(){
|
55
|
super();
|
56
|
}
|
57
|
|
58
|
/* (non-Javadoc)
|
59
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
|
60
|
*/
|
61
|
@Override
|
62
|
protected String getIdQuery() {
|
63
|
return " SELECT occurrenceSourceId FROM " + getTableName();
|
64
|
}
|
65
|
|
66
|
/* (non-Javadoc)
|
67
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
|
68
|
*/
|
69
|
@Override
|
70
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
71
|
String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
|
72
|
" SELECT * " +
|
73
|
" FROM emOccurrenceSource " +
|
74
|
" WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN + ") )" +
|
75
|
"";
|
76
|
return strQuery;
|
77
|
}
|
78
|
|
79
|
|
80
|
|
81
|
@Override
|
82
|
protected void doInvoke(BerlinModelImportState state) {
|
83
|
try {
|
84
|
sourceNumberRefIdMap = makeSourceNameReferenceIdMap(state);
|
85
|
} catch (SQLException e) {
|
86
|
e.printStackTrace();
|
87
|
throw new RuntimeException(e);
|
88
|
}
|
89
|
super.doInvoke(state);
|
90
|
sourceNumberRefIdMap = null;
|
91
|
return;
|
92
|
}
|
93
|
|
94
|
/* (non-Javadoc)
|
95
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
|
96
|
*/
|
97
|
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
98
|
boolean success = true;
|
99
|
ResultSet rs = partitioner.getResultSet();
|
100
|
|
101
|
Set<DescriptionElementBase> objectsToSave = new HashSet<DescriptionElementBase>();
|
102
|
try {
|
103
|
int i = 0;
|
104
|
//for each reference
|
105
|
while (rs.next()){
|
106
|
|
107
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("occurrence sources handled: " + (i-1));}
|
108
|
|
109
|
Integer occurrenceFk = (Integer)rs.getObject("OccurrenceFk");
|
110
|
String sourceNumber = rs.getString("SourceNumber");
|
111
|
String oldName = rs.getString("OldName");
|
112
|
Integer oldNameFk = (Integer)rs.getObject("OldNameFk");
|
113
|
|
114
|
Distribution distribution = (Distribution)state.getRelatedObject(BerlinModelOccurrenceImport.NAMESPACE, String.valueOf(occurrenceFk));
|
115
|
|
116
|
if (distribution == null){
|
117
|
//distribution = duplicateMap.get(occurrenceFk);
|
118
|
}
|
119
|
if (distribution != null){
|
120
|
Integer refId = sourceNumberRefIdMap.get(sourceNumber);
|
121
|
Reference ref = getReference(refId, state);
|
122
|
|
123
|
if (ref != null){
|
124
|
DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
|
125
|
originalSource.setCitation(ref);
|
126
|
TaxonNameBase<?, ?> taxonName;
|
127
|
taxonName = getName(state, oldName, oldNameFk);
|
128
|
if (taxonName != null){
|
129
|
originalSource.setNameUsedInSource(taxonName);
|
130
|
}else if(CdmUtils.isNotEmpty(oldName)){
|
131
|
originalSource.setOriginalNameString(oldName);
|
132
|
}
|
133
|
distribution.addSource(originalSource);
|
134
|
}else{
|
135
|
logger.warn("reference for sourceNumber "+sourceNumber+" could not be found." );
|
136
|
}
|
137
|
}else{
|
138
|
logger.warn("distribution ("+occurrenceFk+") could not be found." );
|
139
|
}
|
140
|
|
141
|
}
|
142
|
logger.info("Distributions to save: " + objectsToSave.size());
|
143
|
getDescriptionService().saveDescriptionElement(objectsToSave);
|
144
|
|
145
|
return success;
|
146
|
} catch (SQLException e) {
|
147
|
logger.error("SQLException:" + e);
|
148
|
return false;
|
149
|
}
|
150
|
}
|
151
|
|
152
|
|
153
|
/* (non-Javadoc)
|
154
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
|
155
|
*/
|
156
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
|
157
|
String nameSpace;
|
158
|
Class cdmClass;
|
159
|
Set<String> idSet;
|
160
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
161
|
|
162
|
try{
|
163
|
Set<String> occurrenceIdSet = new HashSet<String>();
|
164
|
Set<String> referenceIdSet = new HashSet<String>();
|
165
|
Set<String> nameIdSet = new HashSet<String>();
|
166
|
Set<String> sourceNumberSet = new HashSet<String>();
|
167
|
while (rs.next()){
|
168
|
handleForeignKey(rs, occurrenceIdSet, "occurrenceFk");
|
169
|
handleForeignKey(rs, nameIdSet, "oldNameFk");
|
170
|
sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber")));
|
171
|
}
|
172
|
|
173
|
sourceNumberSet.remove("");
|
174
|
referenceIdSet = handleSourceNumber(rs, sourceNumberSet, result);
|
175
|
|
176
|
|
177
|
//occurrence map
|
178
|
nameSpace = BerlinModelOccurrenceImport.NAMESPACE;
|
179
|
cdmClass = Distribution.class;
|
180
|
idSet = occurrenceIdSet;
|
181
|
Map<String, Distribution> occurrenceMap = (Map<String, Distribution>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
182
|
result.put(nameSpace, occurrenceMap);
|
183
|
|
184
|
//name map
|
185
|
nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
|
186
|
cdmClass = TaxonNameBase.class;
|
187
|
idSet =nameIdSet;
|
188
|
Map<String, TaxonNameBase> nameMap = (Map<String, TaxonNameBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
189
|
result.put(nameSpace, nameMap);
|
190
|
|
191
|
//nom reference map
|
192
|
nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
|
193
|
cdmClass = Reference.class;
|
194
|
idSet = referenceIdSet;
|
195
|
Map<String, Reference> nomReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
196
|
result.put(nameSpace, nomReferenceMap);
|
197
|
|
198
|
//biblio reference map
|
199
|
nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
|
200
|
cdmClass = Reference.class;
|
201
|
idSet = referenceIdSet;
|
202
|
Map<String, Reference> biblioReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
203
|
result.put(nameSpace, biblioReferenceMap);
|
204
|
|
205
|
|
206
|
} catch (SQLException e) {
|
207
|
throw new RuntimeException(e);
|
208
|
}
|
209
|
return result;
|
210
|
}
|
211
|
|
212
|
private Set<String> handleSourceNumber(ResultSet rs, Set<String> sourceNumberSet, Map<Object, Map<String, ? extends CdmBase>> result) {
|
213
|
Map<String, Integer> sourceNumberReferenceIdMap = this.sourceNumberRefIdMap;
|
214
|
Set<String> referenceIdSet = new HashSet<String>();
|
215
|
|
216
|
for(String sourceNumber : sourceNumberSet){
|
217
|
Integer refId = sourceNumberReferenceIdMap.get(sourceNumber);
|
218
|
referenceIdSet.add(String.valueOf(refId));
|
219
|
}
|
220
|
return referenceIdSet;
|
221
|
}
|
222
|
|
223
|
|
224
|
|
225
|
/**
|
226
|
* @param state
|
227
|
* @param oldName
|
228
|
* @param oldNameFk
|
229
|
* @return
|
230
|
*/
|
231
|
boolean isFirstTimeNoNameByService = true;
|
232
|
private TaxonNameBase<?, ?> getName(BerlinModelImportState state, String oldName, Integer oldNameFk) {
|
233
|
TaxonNameBase<?,?> taxonName = (TaxonNameBase)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk));
|
234
|
if (taxonName == null && oldName != null){
|
235
|
if (isFirstTimeNoNameByService){
|
236
|
logger.warn("oldName not checked against names in BerlinModel. Just take it as a string");
|
237
|
isFirstTimeNoNameByService = false;
|
238
|
}
|
239
|
List<NonViralName> names = new ArrayList<NonViralName>();
|
240
|
// names = getNameService().getNamesByNameCache(oldName);
|
241
|
if (names.size() == 1){
|
242
|
return names.get(0);
|
243
|
}else {
|
244
|
if (names.size()> 2){
|
245
|
logger.info("There is more than one name matching oldName: " + oldName + ".");
|
246
|
}
|
247
|
return null;
|
248
|
//taxonName = nameParser.parseSimpleName(oldName);
|
249
|
}
|
250
|
}
|
251
|
return taxonName;
|
252
|
}
|
253
|
|
254
|
/**
|
255
|
* Creates a map which maps source numbers on references
|
256
|
* @param state
|
257
|
* @return
|
258
|
* @throws SQLException
|
259
|
*/
|
260
|
private Map<String, Integer> makeSourceNameReferenceIdMap(BerlinModelImportState state) throws SQLException {
|
261
|
Map<String, Integer> result = new HashMap<String, Integer>();
|
262
|
|
263
|
Source source = state.getConfig().getSource();
|
264
|
String strQuery = " SELECT RefId, IdInSource " +
|
265
|
" FROM Reference " +
|
266
|
" WHERE (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
|
267
|
|
268
|
ResultSet rs = source.getResultSet(strQuery) ;
|
269
|
while (rs.next()){
|
270
|
int refId = rs.getInt("RefId");
|
271
|
String idInSource = rs.getString("IdInSource");
|
272
|
if (idInSource != null){
|
273
|
String[] singleSources = idInSource.split("\\|");
|
274
|
for (String singleSource : singleSources){
|
275
|
singleSource = singleSource.trim();
|
276
|
result.put(singleSource, refId);
|
277
|
}
|
278
|
}
|
279
|
}
|
280
|
return result;
|
281
|
}
|
282
|
|
283
|
|
284
|
|
285
|
private Reference getReference(Integer refId, BerlinModelImportState state) {
|
286
|
Reference<?> ref = (Reference)state.getRelatedObject(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE, String.valueOf(refId));
|
287
|
if (ref == null){
|
288
|
ref = (Reference)state.getRelatedObject(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE, String.valueOf(refId));;
|
289
|
}
|
290
|
return ref;
|
291
|
}
|
292
|
|
293
|
|
294
|
|
295
|
|
296
|
/* (non-Javadoc)
|
297
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
|
298
|
*/
|
299
|
@Override
|
300
|
protected boolean doCheck(BerlinModelImportState state){
|
301
|
IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator();
|
302
|
return validator.validate(state);
|
303
|
}
|
304
|
|
305
|
/* (non-Javadoc)
|
306
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
|
307
|
*/
|
308
|
@Override
|
309
|
protected String getTableName() {
|
310
|
return dbTableName;
|
311
|
}
|
312
|
|
313
|
/* (non-Javadoc)
|
314
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
|
315
|
*/
|
316
|
@Override
|
317
|
public String getPluralString() {
|
318
|
return pluralString;
|
319
|
}
|
320
|
|
321
|
/* (non-Javadoc)
|
322
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
323
|
*/
|
324
|
protected boolean isIgnore(BerlinModelImportState state){
|
325
|
return ! state.getConfig().isDoOccurrence();
|
326
|
}
|
327
|
|
328
|
}
|