1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.berlinModel.in;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
|
21
|
import org.apache.log4j.Logger;
|
22
|
import org.springframework.stereotype.Component;
|
23
|
|
24
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25
|
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceSourceImportValidator;
|
26
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
27
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
28
|
import eu.etaxonomy.cdm.io.common.Source;
|
29
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
30
|
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
|
31
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
32
|
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
|
33
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
34
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
35
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
36
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
37
|
|
38
|
|
39
|
/**
|
40
|
* @author a.mueller
|
41
|
* @created 20.03.2008
|
42
|
*/
|
43
|
@Component
|
44
|
public class BerlinModelOccurrenceSourceImport extends BerlinModelImportBase {
|
45
|
private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceSourceImport.class);
|
46
|
|
47
|
private static int modCount = 5000;
|
48
|
private static final String pluralString = "occurrence sources";
|
49
|
private static final String dbTableName = "emOccurrenceSource"; //??
|
50
|
|
51
|
|
52
|
private Map<String, Integer> sourceNumberRefIdMap;
|
53
|
private Set<String> unfoundReferences = new HashSet<String>();
|
54
|
|
55
|
|
56
|
public BerlinModelOccurrenceSourceImport(){
|
57
|
super(dbTableName, pluralString);
|
58
|
}
|
59
|
|
60
|
@Override
|
61
|
protected String getIdQuery(BerlinModelImportState state) {
|
62
|
String result = "SELECT occurrenceSourceId FROM " + getTableName();
|
63
|
if (state.getConfig().getOccurrenceSourceFilter() != null){
|
64
|
result += " WHERE " + state.getConfig().getOccurrenceSourceFilter();
|
65
|
}
|
66
|
return result;
|
67
|
}
|
68
|
|
69
|
@Override
|
70
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
71
|
String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
|
72
|
" SELECT * " +
|
73
|
" FROM emOccurrenceSource " +
|
74
|
" WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN + ") )" +
|
75
|
"";
|
76
|
return strQuery;
|
77
|
}
|
78
|
|
79
|
|
80
|
|
81
|
@Override
|
82
|
protected void doInvoke(BerlinModelImportState state) {
|
83
|
unfoundReferences = new HashSet<String>();
|
84
|
|
85
|
try {
|
86
|
sourceNumberRefIdMap = makeSourceNumberReferenceIdMap(state);
|
87
|
} catch (SQLException e) {
|
88
|
e.printStackTrace();
|
89
|
throw new RuntimeException(e);
|
90
|
}
|
91
|
super.doInvoke(state);
|
92
|
sourceNumberRefIdMap = null;
|
93
|
if (unfoundReferences.size()>0){
|
94
|
String unfound = "'" + CdmUtils.concat("','", unfoundReferences.toArray(new String[]{})) + "'";
|
95
|
logger.warn("Not found references: " + unfound);
|
96
|
}
|
97
|
return;
|
98
|
}
|
99
|
|
100
|
@Override
|
101
|
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
102
|
boolean success = true;
|
103
|
ResultSet rs = partitioner.getResultSet();
|
104
|
Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
|
105
|
|
106
|
Set<DescriptionElementBase> objectsToSave = new HashSet<DescriptionElementBase>();
|
107
|
try {
|
108
|
int i = 0;
|
109
|
//for each reference
|
110
|
while (rs.next()){
|
111
|
|
112
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("occurrence sources handled: " + (i-1));}
|
113
|
|
114
|
Integer occurrenceSourceId = rs.getInt("OccurrenceSourceId");
|
115
|
Integer occurrenceFk =nullSafeInt(rs, "OccurrenceFk");
|
116
|
String sourceNumber = rs.getString("SourceNumber");
|
117
|
String oldName = rs.getString("OldName");
|
118
|
Integer oldNameFk = nullSafeInt(rs, "OldNameFk");
|
119
|
|
120
|
Distribution distribution = (Distribution)state.getRelatedObject(BerlinModelOccurrenceImport.NAMESPACE, String.valueOf(occurrenceFk));
|
121
|
|
122
|
if (distribution == null){
|
123
|
//distribution = duplicateMap.get(occurrenceFk);
|
124
|
}
|
125
|
if (distribution != null){
|
126
|
Integer refId = sourceNumberRefIdMap.get(sourceNumber);
|
127
|
Reference ref = refMap.get(String.valueOf(refId));
|
128
|
|
129
|
if (ref != null){
|
130
|
DescriptionElementSource originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
|
131
|
originalSource.setCitation(ref);
|
132
|
TaxonName taxonName;
|
133
|
taxonName = TaxonName.castAndDeproxy(getName(state, oldName, oldNameFk));
|
134
|
if (taxonName != null){
|
135
|
originalSource.setNameUsedInSource(taxonName);
|
136
|
}else if(isNotBlank(oldName)){
|
137
|
originalSource.setOriginalNameString(oldName);
|
138
|
}
|
139
|
distribution.addSource(originalSource);
|
140
|
}else{
|
141
|
logger.warn("reference for sourceNumber "+sourceNumber+" could not be found. OccurrenceSourceId: " + occurrenceSourceId );
|
142
|
unfoundReferences.add(sourceNumber);
|
143
|
}
|
144
|
}else{
|
145
|
logger.warn("distribution ("+occurrenceFk+") for occurrence source (" + occurrenceSourceId + ") could not be found." );
|
146
|
}
|
147
|
|
148
|
}
|
149
|
logger.info("Distributions to save: " + objectsToSave.size());
|
150
|
getDescriptionService().saveDescriptionElement(objectsToSave);
|
151
|
|
152
|
return success;
|
153
|
} catch (SQLException e) {
|
154
|
logger.error("SQLException:" + e);
|
155
|
return false;
|
156
|
}
|
157
|
}
|
158
|
|
159
|
|
160
|
@Override
|
161
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
|
162
|
String nameSpace;
|
163
|
Class<?> cdmClass;
|
164
|
Set<String> idSet;
|
165
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
166
|
|
167
|
try{
|
168
|
Set<String> occurrenceIdSet = new HashSet<String>();
|
169
|
Set<String> referenceIdSet = new HashSet<String>();
|
170
|
Set<String> nameIdSet = new HashSet<String>();
|
171
|
Set<String> sourceNumberSet = new HashSet<String>();
|
172
|
while (rs.next()){
|
173
|
handleForeignKey(rs, occurrenceIdSet, "occurrenceFk");
|
174
|
handleForeignKey(rs, nameIdSet, "oldNameFk");
|
175
|
sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber")));
|
176
|
}
|
177
|
|
178
|
sourceNumberSet.remove("");
|
179
|
referenceIdSet = handleSourceNumber(rs, sourceNumberSet, result);
|
180
|
|
181
|
|
182
|
//occurrence map
|
183
|
nameSpace = BerlinModelOccurrenceImport.NAMESPACE;
|
184
|
cdmClass = Distribution.class;
|
185
|
idSet = occurrenceIdSet;
|
186
|
Map<String, Distribution> occurrenceMap = (Map<String, Distribution>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
187
|
result.put(nameSpace, occurrenceMap);
|
188
|
|
189
|
//name map
|
190
|
nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
|
191
|
cdmClass = TaxonName.class;
|
192
|
idSet =nameIdSet;
|
193
|
Map<String, TaxonName> nameMap = (Map<String, TaxonName>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
194
|
result.put(nameSpace, nameMap);
|
195
|
|
196
|
//reference map
|
197
|
nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
|
198
|
cdmClass = Reference.class;
|
199
|
idSet = referenceIdSet;
|
200
|
Map<String, Reference> referenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
201
|
result.put(nameSpace, referenceMap);
|
202
|
|
203
|
} catch (SQLException e) {
|
204
|
throw new RuntimeException(e);
|
205
|
}
|
206
|
return result;
|
207
|
}
|
208
|
|
209
|
private Set<String> handleSourceNumber(ResultSet rs, Set<String> sourceNumberSet, Map<Object, Map<String, ? extends CdmBase>> result) {
|
210
|
Map<String, Integer> sourceNumberReferenceIdMap = this.sourceNumberRefIdMap;
|
211
|
Set<String> referenceIdSet = new HashSet<String>();
|
212
|
|
213
|
for(String sourceNumber : sourceNumberSet){
|
214
|
Integer refId = sourceNumberReferenceIdMap.get(sourceNumber);
|
215
|
referenceIdSet.add(String.valueOf(refId));
|
216
|
}
|
217
|
return referenceIdSet;
|
218
|
}
|
219
|
|
220
|
|
221
|
|
222
|
/**
|
223
|
* @param state
|
224
|
* @param oldName
|
225
|
* @param oldNameFk
|
226
|
* @return
|
227
|
*/
|
228
|
boolean isFirstTimeNoNameByService = true;
|
229
|
private INonViralName getName(BerlinModelImportState state, String oldName, Integer oldNameFk) {
|
230
|
TaxonName taxonName = (TaxonName)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk));
|
231
|
if (taxonName == null && oldName != null){
|
232
|
if (isFirstTimeNoNameByService){
|
233
|
logger.warn("oldName not checked against names in BerlinModel. Just take it as a string");
|
234
|
isFirstTimeNoNameByService = false;
|
235
|
}
|
236
|
List<INonViralName> names = new ArrayList<>();
|
237
|
// names = getNameService().getNamesByNameCache(oldName);
|
238
|
if (names.size() == 1){
|
239
|
return names.get(0);
|
240
|
}else {
|
241
|
if (names.size()> 2){
|
242
|
logger.info("There is more than one name matching oldName: " + oldName + ".");
|
243
|
}
|
244
|
return null;
|
245
|
//taxonName = nameParser.parseSimpleName(oldName);
|
246
|
}
|
247
|
}
|
248
|
return taxonName;
|
249
|
}
|
250
|
|
251
|
/**
|
252
|
* Creates a map which maps source numbers on references
|
253
|
* @param state
|
254
|
* @return
|
255
|
* @throws SQLException
|
256
|
*/
|
257
|
private Map<String, Integer> makeSourceNumberReferenceIdMap(BerlinModelImportState state) throws SQLException {
|
258
|
Map<String, Integer> result = new HashMap<String, Integer>();
|
259
|
|
260
|
Source source = state.getConfig().getSource();
|
261
|
String strQuery = " SELECT RefId, IdInSource " +
|
262
|
" FROM Reference " +
|
263
|
" WHERE (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
|
264
|
|
265
|
ResultSet rs = source.getResultSet(strQuery) ;
|
266
|
while (rs.next()){
|
267
|
int refId = rs.getInt("RefId");
|
268
|
String idInSource = rs.getString("IdInSource");
|
269
|
if (idInSource != null){
|
270
|
String[] singleSources = idInSource.split("\\|");
|
271
|
for (String singleSource : singleSources){
|
272
|
singleSource = singleSource.trim();
|
273
|
result.put(singleSource, refId);
|
274
|
}
|
275
|
}
|
276
|
}
|
277
|
return result;
|
278
|
}
|
279
|
|
280
|
@Override
|
281
|
protected boolean doCheck(BerlinModelImportState state){
|
282
|
IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator();
|
283
|
return validator.validate(state);
|
284
|
}
|
285
|
|
286
|
@Override
|
287
|
protected boolean isIgnore(BerlinModelImportState state){
|
288
|
if (! state.getConfig().isDoOccurrence()){
|
289
|
return true;
|
290
|
}else{
|
291
|
if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrenceSource", "OccurrenceSourceId")){
|
292
|
logger.error("emOccurrenceSource table or emOccurrenceSourceId does not exist. Must ignore occurrence import");
|
293
|
return true;
|
294
|
}else{
|
295
|
return false;
|
296
|
}
|
297
|
}
|
298
|
}
|
299
|
|
300
|
}
|