1 |
5dd8e8b0
|
Andreas Müller
|
/**
|
2 |
|
|
* Copyright (C) 2007 EDIT
|
3 |
|
|
* European Distributed Institute of Taxonomy
|
4 |
|
|
* http://www.e-taxonomy.eu
|
5 |
|
|
*
|
6 |
|
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7 |
|
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8 |
|
|
*/
|
9 |
|
|
|
10 |
|
|
package eu.etaxonomy.cdm.io.berlinModel.in;
|
11 |
|
|
|
12 |
|
|
import java.sql.ResultSet;
|
13 |
|
|
import java.sql.SQLException;
|
14 |
|
|
import java.util.ArrayList;
|
15 |
|
|
import java.util.HashMap;
|
16 |
|
|
import java.util.HashSet;
|
17 |
|
|
import java.util.List;
|
18 |
|
|
import java.util.Map;
|
19 |
|
|
import java.util.Set;
|
20 |
|
|
|
21 |
|
|
import org.apache.log4j.Logger;
|
22 |
|
|
import org.springframework.stereotype.Component;
|
23 |
|
|
|
24 |
|
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25 |
|
|
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceSourceImportValidator;
|
26 |
|
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
27 |
|
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
28 |
|
|
import eu.etaxonomy.cdm.io.common.Source;
|
29 |
|
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
30 |
|
|
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
|
31 |
|
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
32 |
|
|
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
|
33 |
|
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
34 |
|
|
import eu.etaxonomy.cdm.model.name.NonViralName;
|
35 |
|
|
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
|
36 |
|
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
/**
|
40 |
|
|
* @author a.mueller
|
41 |
|
|
* @created 20.03.2008
|
42 |
|
|
*/
|
43 |
|
|
@Component
|
44 |
|
|
public class BerlinModelOccurrenceSourceImport extends BerlinModelImportBase {
|
45 |
|
|
private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceSourceImport.class);
|
46 |
|
|
|
47 |
|
|
private static int modCount = 5000;
|
48 |
|
|
private static final String pluralString = "occurrence sources";
|
49 |
|
|
private static final String dbTableName = "emOccurrenceSource"; //??
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
private Map<String, Integer> sourceNumberRefIdMap;
|
53 |
|
|
private Set<String> unfoundReferences = new HashSet<String>();
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
public BerlinModelOccurrenceSourceImport(){
|
57 |
|
|
super(dbTableName, pluralString);
|
58 |
|
|
}
|
59 |
|
|
|
60 |
|
|
@Override
|
61 |
|
|
protected String getIdQuery(BerlinModelImportState state) {
|
62 |
|
|
String result = "SELECT occurrenceSourceId FROM " + getTableName();
|
63 |
|
|
if (state.getConfig().getOccurrenceSourceFilter() != null){
|
64 |
|
|
result += " WHERE " + state.getConfig().getOccurrenceSourceFilter();
|
65 |
|
|
}
|
66 |
|
|
return result;
|
67 |
|
|
}
|
68 |
|
|
|
69 |
|
|
@Override
|
70 |
|
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
71 |
|
|
String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
|
72 |
|
|
" SELECT * " +
|
73 |
|
|
" FROM emOccurrenceSource " +
|
74 |
|
|
" WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN + ") )" +
|
75 |
|
|
"";
|
76 |
|
|
return strQuery;
|
77 |
|
|
}
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
|
81 |
|
|
@Override
|
82 |
|
|
protected void doInvoke(BerlinModelImportState state) {
|
83 |
|
|
unfoundReferences = new HashSet<String>();
|
84 |
|
|
|
85 |
|
|
try {
|
86 |
|
|
sourceNumberRefIdMap = makeSourceNumberReferenceIdMap(state);
|
87 |
|
|
} catch (SQLException e) {
|
88 |
|
|
e.printStackTrace();
|
89 |
|
|
throw new RuntimeException(e);
|
90 |
|
|
}
|
91 |
|
|
super.doInvoke(state);
|
92 |
|
|
sourceNumberRefIdMap = null;
|
93 |
|
|
if (unfoundReferences.size()>0){
|
94 |
|
|
String unfound = "'" + CdmUtils.concat("','", unfoundReferences.toArray(new String[]{})) + "'";
|
95 |
|
|
logger.warn("Not found references: " + unfound);
|
96 |
|
|
}
|
97 |
|
|
return;
|
98 |
|
|
}
|
99 |
|
|
|
100 |
|
|
@Override
|
101 |
|
|
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
102 |
|
|
boolean success = true;
|
103 |
|
|
ResultSet rs = partitioner.getResultSet();
|
104 |
|
|
Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
|
105 |
|
|
|
106 |
|
|
Set<DescriptionElementBase> objectsToSave = new HashSet<DescriptionElementBase>();
|
107 |
|
|
try {
|
108 |
|
|
int i = 0;
|
109 |
|
|
//for each reference
|
110 |
|
|
while (rs.next()){
|
111 |
|
|
|
112 |
|
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("occurrence sources handled: " + (i-1));}
|
113 |
|
|
|
114 |
|
|
Integer occurrenceSourceId = rs.getInt("OccurrenceSourceId");
|
115 |
|
|
Integer occurrenceFk =nullSafeInt(rs, "OccurrenceFk");
|
116 |
|
|
String sourceNumber = rs.getString("SourceNumber");
|
117 |
|
|
String oldName = rs.getString("OldName");
|
118 |
|
|
Integer oldNameFk = nullSafeInt(rs, "OldNameFk");
|
119 |
|
|
|
120 |
|
|
Distribution distribution = (Distribution)state.getRelatedObject(BerlinModelOccurrenceImport.NAMESPACE, String.valueOf(occurrenceFk));
|
121 |
|
|
|
122 |
|
|
if (distribution == null){
|
123 |
|
|
//distribution = duplicateMap.get(occurrenceFk);
|
124 |
|
|
}
|
125 |
|
|
if (distribution != null){
|
126 |
|
|
Integer refId = sourceNumberRefIdMap.get(sourceNumber);
|
127 |
|
|
Reference<?> ref = refMap.get(String.valueOf(refId));
|
128 |
|
|
|
129 |
|
|
if (ref != null){
|
130 |
|
|
DescriptionElementSource originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
|
131 |
|
|
originalSource.setCitation(ref);
|
132 |
|
|
TaxonNameBase<?, ?> taxonName;
|
133 |
|
|
taxonName = getName(state, oldName, oldNameFk);
|
134 |
|
|
if (taxonName != null){
|
135 |
|
|
originalSource.setNameUsedInSource(taxonName);
|
136 |
|
|
}else if(isNotBlank(oldName)){
|
137 |
|
|
originalSource.setOriginalNameString(oldName);
|
138 |
|
|
}
|
139 |
|
|
distribution.addSource(originalSource);
|
140 |
|
|
}else{
|
141 |
|
|
logger.warn("reference for sourceNumber "+sourceNumber+" could not be found. OccurrenceSourceId: " + occurrenceSourceId );
|
142 |
|
|
unfoundReferences.add(sourceNumber);
|
143 |
|
|
}
|
144 |
|
|
}else{
|
145 |
|
|
logger.warn("distribution ("+occurrenceFk+") for occurrence source (" + occurrenceSourceId + ") could not be found." );
|
146 |
|
|
}
|
147 |
|
|
|
148 |
|
|
}
|
149 |
|
|
logger.info("Distributions to save: " + objectsToSave.size());
|
150 |
|
|
getDescriptionService().saveDescriptionElement(objectsToSave);
|
151 |
|
|
|
152 |
|
|
return success;
|
153 |
|
|
} catch (SQLException e) {
|
154 |
|
|
logger.error("SQLException:" + e);
|
155 |
|
|
return false;
|
156 |
|
|
}
|
157 |
|
|
}
|
158 |
|
|
|
159 |
|
|
|
160 |
|
|
@Override
|
161 |
7c778a7e
|
Andreas Müller
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
|
162 |
5dd8e8b0
|
Andreas Müller
|
String nameSpace;
|
163 |
7c778a7e
|
Andreas Müller
|
Class<?> cdmClass;
|
164 |
5dd8e8b0
|
Andreas Müller
|
Set<String> idSet;
|
165 |
|
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
166 |
|
|
|
167 |
|
|
try{
|
168 |
|
|
Set<String> occurrenceIdSet = new HashSet<String>();
|
169 |
|
|
Set<String> referenceIdSet = new HashSet<String>();
|
170 |
|
|
Set<String> nameIdSet = new HashSet<String>();
|
171 |
|
|
Set<String> sourceNumberSet = new HashSet<String>();
|
172 |
|
|
while (rs.next()){
|
173 |
|
|
handleForeignKey(rs, occurrenceIdSet, "occurrenceFk");
|
174 |
|
|
handleForeignKey(rs, nameIdSet, "oldNameFk");
|
175 |
|
|
sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber")));
|
176 |
|
|
}
|
177 |
|
|
|
178 |
|
|
sourceNumberSet.remove("");
|
179 |
|
|
referenceIdSet = handleSourceNumber(rs, sourceNumberSet, result);
|
180 |
|
|
|
181 |
|
|
|
182 |
|
|
//occurrence map
|
183 |
|
|
nameSpace = BerlinModelOccurrenceImport.NAMESPACE;
|
184 |
|
|
cdmClass = Distribution.class;
|
185 |
|
|
idSet = occurrenceIdSet;
|
186 |
|
|
Map<String, Distribution> occurrenceMap = (Map<String, Distribution>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
187 |
|
|
result.put(nameSpace, occurrenceMap);
|
188 |
|
|
|
189 |
|
|
//name map
|
190 |
|
|
nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
|
191 |
|
|
cdmClass = TaxonNameBase.class;
|
192 |
|
|
idSet =nameIdSet;
|
193 |
|
|
Map<String, TaxonNameBase> nameMap = (Map<String, TaxonNameBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
194 |
|
|
result.put(nameSpace, nameMap);
|
195 |
|
|
|
196 |
|
|
//reference map
|
197 |
|
|
nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
|
198 |
|
|
cdmClass = Reference.class;
|
199 |
|
|
idSet = referenceIdSet;
|
200 |
|
|
Map<String, Reference> referenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
201 |
|
|
result.put(nameSpace, referenceMap);
|
202 |
|
|
|
203 |
|
|
} catch (SQLException e) {
|
204 |
|
|
throw new RuntimeException(e);
|
205 |
|
|
}
|
206 |
|
|
return result;
|
207 |
|
|
}
|
208 |
|
|
|
209 |
|
|
private Set<String> handleSourceNumber(ResultSet rs, Set<String> sourceNumberSet, Map<Object, Map<String, ? extends CdmBase>> result) {
|
210 |
|
|
Map<String, Integer> sourceNumberReferenceIdMap = this.sourceNumberRefIdMap;
|
211 |
|
|
Set<String> referenceIdSet = new HashSet<String>();
|
212 |
|
|
|
213 |
|
|
for(String sourceNumber : sourceNumberSet){
|
214 |
|
|
Integer refId = sourceNumberReferenceIdMap.get(sourceNumber);
|
215 |
|
|
referenceIdSet.add(String.valueOf(refId));
|
216 |
|
|
}
|
217 |
|
|
return referenceIdSet;
|
218 |
|
|
}
|
219 |
|
|
|
220 |
|
|
|
221 |
|
|
|
222 |
|
|
/**
|
223 |
|
|
* @param state
|
224 |
|
|
* @param oldName
|
225 |
|
|
* @param oldNameFk
|
226 |
|
|
* @return
|
227 |
|
|
*/
|
228 |
|
|
boolean isFirstTimeNoNameByService = true;
|
229 |
|
|
private TaxonNameBase<?, ?> getName(BerlinModelImportState state, String oldName, Integer oldNameFk) {
|
230 |
|
|
TaxonNameBase<?,?> taxonName = (TaxonNameBase)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk));
|
231 |
|
|
if (taxonName == null && oldName != null){
|
232 |
|
|
if (isFirstTimeNoNameByService){
|
233 |
|
|
logger.warn("oldName not checked against names in BerlinModel. Just take it as a string");
|
234 |
|
|
isFirstTimeNoNameByService = false;
|
235 |
|
|
}
|
236 |
|
|
List<NonViralName> names = new ArrayList<NonViralName>();
|
237 |
|
|
// names = getNameService().getNamesByNameCache(oldName);
|
238 |
|
|
if (names.size() == 1){
|
239 |
|
|
return names.get(0);
|
240 |
|
|
}else {
|
241 |
|
|
if (names.size()> 2){
|
242 |
|
|
logger.info("There is more than one name matching oldName: " + oldName + ".");
|
243 |
|
|
}
|
244 |
|
|
return null;
|
245 |
|
|
//taxonName = nameParser.parseSimpleName(oldName);
|
246 |
|
|
}
|
247 |
|
|
}
|
248 |
|
|
return taxonName;
|
249 |
|
|
}
|
250 |
|
|
|
251 |
|
|
/**
|
252 |
|
|
* Creates a map which maps source numbers on references
|
253 |
|
|
* @param state
|
254 |
|
|
* @return
|
255 |
|
|
* @throws SQLException
|
256 |
|
|
*/
|
257 |
|
|
private Map<String, Integer> makeSourceNumberReferenceIdMap(BerlinModelImportState state) throws SQLException {
|
258 |
|
|
Map<String, Integer> result = new HashMap<String, Integer>();
|
259 |
|
|
|
260 |
|
|
Source source = state.getConfig().getSource();
|
261 |
|
|
String strQuery = " SELECT RefId, IdInSource " +
|
262 |
|
|
" FROM Reference " +
|
263 |
|
|
" WHERE (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
|
264 |
|
|
|
265 |
|
|
ResultSet rs = source.getResultSet(strQuery) ;
|
266 |
|
|
while (rs.next()){
|
267 |
|
|
int refId = rs.getInt("RefId");
|
268 |
|
|
String idInSource = rs.getString("IdInSource");
|
269 |
|
|
if (idInSource != null){
|
270 |
|
|
String[] singleSources = idInSource.split("\\|");
|
271 |
|
|
for (String singleSource : singleSources){
|
272 |
|
|
singleSource = singleSource.trim();
|
273 |
|
|
result.put(singleSource, refId);
|
274 |
|
|
}
|
275 |
|
|
}
|
276 |
|
|
}
|
277 |
|
|
return result;
|
278 |
|
|
}
|
279 |
|
|
|
280 |
|
|
@Override
|
281 |
|
|
protected boolean doCheck(BerlinModelImportState state){
|
282 |
|
|
IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator();
|
283 |
|
|
return validator.validate(state);
|
284 |
|
|
}
|
285 |
|
|
|
286 |
|
|
@Override
|
287 |
|
|
protected boolean isIgnore(BerlinModelImportState state){
|
288 |
|
|
if (! state.getConfig().isDoOccurrence()){
|
289 |
|
|
return true;
|
290 |
|
|
}else{
|
291 |
|
|
if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrenceSource", "OccurrenceSourceId")){
|
292 |
|
|
logger.error("emOccurrenceSource table or emOccurrenceSourceId does not exist. Must ignore occurrence import");
|
293 |
|
|
return true;
|
294 |
|
|
}else{
|
295 |
|
|
return false;
|
296 |
|
|
}
|
297 |
|
|
}
|
298 |
|
|
}
|
299 |
|
|
|
300 |
|
|
}
|