update factory methods for original sources #1549
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelOccurrenceSourceImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.Set;
20
21 import org.apache.log4j.Logger;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.common.CdmUtils;
25 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceSourceImportValidator;
26 import eu.etaxonomy.cdm.io.common.IOValidator;
27 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
28 import eu.etaxonomy.cdm.io.common.Source;
29 import eu.etaxonomy.cdm.model.common.CdmBase;
30 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
31 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
32 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
33 import eu.etaxonomy.cdm.model.description.Distribution;
34 import eu.etaxonomy.cdm.model.name.NonViralName;
35 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
36 import eu.etaxonomy.cdm.model.reference.Reference;
37
38
39 /**
40 * @author a.mueller
41 * @created 20.03.2008
42 * @version 1.0
43 */
44 @Component
45 public class BerlinModelOccurrenceSourceImport extends BerlinModelImportBase {
46 private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceSourceImport.class);
47
48 private static int modCount = 5000;
49 private static final String pluralString = "occurrence sources";
50 private static final String dbTableName = "emOccurrenceSource"; //??
51
52
53 private Map<String, Integer> sourceNumberRefIdMap;
54 private Set<String> unfoundReferences = new HashSet<String>();
55
56
57 public BerlinModelOccurrenceSourceImport(){
58 super(dbTableName, pluralString);
59 }
60
61 /* (non-Javadoc)
62 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
63 */
64 @Override
65 protected String getIdQuery(BerlinModelImportState state) {
66 String result = "SELECT occurrenceSourceId FROM " + getTableName();
67 if (state.getConfig().getOccurrenceSourceFilter() != null){
68 result += " WHERE " + state.getConfig().getOccurrenceSourceFilter();
69 }
70 return result;
71 }
72
73 /* (non-Javadoc)
74 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
75 */
76 @Override
77 protected String getRecordQuery(BerlinModelImportConfigurator config) {
78 String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
79 " SELECT * " +
80 " FROM emOccurrenceSource " +
81 " WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN + ") )" +
82 "";
83 return strQuery;
84 }
85
86
87
88 @Override
89 protected void doInvoke(BerlinModelImportState state) {
90 unfoundReferences = new HashSet<String>();
91
92 try {
93 sourceNumberRefIdMap = makeSourceNumberReferenceIdMap(state);
94 } catch (SQLException e) {
95 e.printStackTrace();
96 throw new RuntimeException(e);
97 }
98 super.doInvoke(state);
99 sourceNumberRefIdMap = null;
100 if (unfoundReferences.size()>0){
101 String unfound = "'" + CdmUtils.concat("','", unfoundReferences.toArray(new String[]{})) + "'";
102 logger.warn("Not found references: " + unfound);
103 }
104 return;
105 }
106
107 /* (non-Javadoc)
108 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
109 */
110 public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
111 boolean success = true;
112 ResultSet rs = partitioner.getResultSet();
113
114 Set<DescriptionElementBase> objectsToSave = new HashSet<DescriptionElementBase>();
115 try {
116 int i = 0;
117 //for each reference
118 while (rs.next()){
119
120 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("occurrence sources handled: " + (i-1));}
121
122 Integer occurrenceSourceId = rs.getInt("OccurrenceSourceId");
123 Integer occurrenceFk =nullSafeInt(rs, "OccurrenceFk");
124 String sourceNumber = rs.getString("SourceNumber");
125 String oldName = rs.getString("OldName");
126 Integer oldNameFk = nullSafeInt(rs, "OldNameFk");
127
128 Distribution distribution = (Distribution)state.getRelatedObject(BerlinModelOccurrenceImport.NAMESPACE, String.valueOf(occurrenceFk));
129
130 if (distribution == null){
131 //distribution = duplicateMap.get(occurrenceFk);
132 }
133 if (distribution != null){
134 Integer refId = sourceNumberRefIdMap.get(sourceNumber);
135 Reference<?> ref = getReference(refId, state);
136
137 if (ref != null){
138 DescriptionElementSource originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
139 originalSource.setCitation(ref);
140 TaxonNameBase<?, ?> taxonName;
141 taxonName = getName(state, oldName, oldNameFk);
142 if (taxonName != null){
143 originalSource.setNameUsedInSource(taxonName);
144 }else if(isNotBlank(oldName)){
145 originalSource.setOriginalNameString(oldName);
146 }
147 distribution.addSource(originalSource);
148 }else{
149 logger.warn("reference for sourceNumber "+sourceNumber+" could not be found. OccurrenceSourceId: " + occurrenceSourceId );
150 unfoundReferences.add(sourceNumber);
151 }
152 }else{
153 logger.warn("distribution ("+occurrenceFk+") for occurrence source (" + occurrenceSourceId + ") could not be found." );
154 }
155
156 }
157 logger.info("Distributions to save: " + objectsToSave.size());
158 getDescriptionService().saveDescriptionElement(objectsToSave);
159
160 return success;
161 } catch (SQLException e) {
162 logger.error("SQLException:" + e);
163 return false;
164 }
165 }
166
167
168 /* (non-Javadoc)
169 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
170 */
171 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
172 String nameSpace;
173 Class cdmClass;
174 Set<String> idSet;
175 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
176
177 try{
178 Set<String> occurrenceIdSet = new HashSet<String>();
179 Set<String> referenceIdSet = new HashSet<String>();
180 Set<String> nameIdSet = new HashSet<String>();
181 Set<String> sourceNumberSet = new HashSet<String>();
182 while (rs.next()){
183 handleForeignKey(rs, occurrenceIdSet, "occurrenceFk");
184 handleForeignKey(rs, nameIdSet, "oldNameFk");
185 sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber")));
186 }
187
188 sourceNumberSet.remove("");
189 referenceIdSet = handleSourceNumber(rs, sourceNumberSet, result);
190
191
192 //occurrence map
193 nameSpace = BerlinModelOccurrenceImport.NAMESPACE;
194 cdmClass = Distribution.class;
195 idSet = occurrenceIdSet;
196 Map<String, Distribution> occurrenceMap = (Map<String, Distribution>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
197 result.put(nameSpace, occurrenceMap);
198
199 //name map
200 nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
201 cdmClass = TaxonNameBase.class;
202 idSet =nameIdSet;
203 Map<String, TaxonNameBase> nameMap = (Map<String, TaxonNameBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
204 result.put(nameSpace, nameMap);
205
206 //nom reference map
207 nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
208 cdmClass = Reference.class;
209 idSet = referenceIdSet;
210 Map<String, Reference> nomReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
211 result.put(nameSpace, nomReferenceMap);
212
213 //biblio reference map
214 nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
215 cdmClass = Reference.class;
216 idSet = referenceIdSet;
217 Map<String, Reference> biblioReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
218 result.put(nameSpace, biblioReferenceMap);
219
220
221 } catch (SQLException e) {
222 throw new RuntimeException(e);
223 }
224 return result;
225 }
226
227 private Set<String> handleSourceNumber(ResultSet rs, Set<String> sourceNumberSet, Map<Object, Map<String, ? extends CdmBase>> result) {
228 Map<String, Integer> sourceNumberReferenceIdMap = this.sourceNumberRefIdMap;
229 Set<String> referenceIdSet = new HashSet<String>();
230
231 for(String sourceNumber : sourceNumberSet){
232 Integer refId = sourceNumberReferenceIdMap.get(sourceNumber);
233 referenceIdSet.add(String.valueOf(refId));
234 }
235 return referenceIdSet;
236 }
237
238
239
240 /**
241 * @param state
242 * @param oldName
243 * @param oldNameFk
244 * @return
245 */
246 boolean isFirstTimeNoNameByService = true;
247 private TaxonNameBase<?, ?> getName(BerlinModelImportState state, String oldName, Integer oldNameFk) {
248 TaxonNameBase<?,?> taxonName = (TaxonNameBase)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk));
249 if (taxonName == null && oldName != null){
250 if (isFirstTimeNoNameByService){
251 logger.warn("oldName not checked against names in BerlinModel. Just take it as a string");
252 isFirstTimeNoNameByService = false;
253 }
254 List<NonViralName> names = new ArrayList<NonViralName>();
255 // names = getNameService().getNamesByNameCache(oldName);
256 if (names.size() == 1){
257 return names.get(0);
258 }else {
259 if (names.size()> 2){
260 logger.info("There is more than one name matching oldName: " + oldName + ".");
261 }
262 return null;
263 //taxonName = nameParser.parseSimpleName(oldName);
264 }
265 }
266 return taxonName;
267 }
268
269 /**
270 * Creates a map which maps source numbers on references
271 * @param state
272 * @return
273 * @throws SQLException
274 */
275 private Map<String, Integer> makeSourceNumberReferenceIdMap(BerlinModelImportState state) throws SQLException {
276 Map<String, Integer> result = new HashMap<String, Integer>();
277
278 Source source = state.getConfig().getSource();
279 String strQuery = " SELECT RefId, IdInSource " +
280 " FROM Reference " +
281 " WHERE (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
282
283 ResultSet rs = source.getResultSet(strQuery) ;
284 while (rs.next()){
285 int refId = rs.getInt("RefId");
286 String idInSource = rs.getString("IdInSource");
287 if (idInSource != null){
288 String[] singleSources = idInSource.split("\\|");
289 for (String singleSource : singleSources){
290 singleSource = singleSource.trim();
291 result.put(singleSource, refId);
292 }
293 }
294 }
295 return result;
296 }
297
298
299
300 private Reference getReference(Integer refId, BerlinModelImportState state) {
301 Reference<?> ref = (Reference)state.getRelatedObject(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE, String.valueOf(refId));
302 if (ref == null){
303 ref = (Reference)state.getRelatedObject(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE, String.valueOf(refId));;
304 }
305 return ref;
306 }
307
308
309
310
311 /* (non-Javadoc)
312 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
313 */
314 @Override
315 protected boolean doCheck(BerlinModelImportState state){
316 IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator();
317 return validator.validate(state);
318 }
319
320 /* (non-Javadoc)
321 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
322 */
323 protected boolean isIgnore(BerlinModelImportState state){
324 if (! state.getConfig().isDoOccurrence()){
325 return true;
326 }else{
327 if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrenceSource", "OccurrenceSourceId")){
328 logger.error("emOccurrenceSource table or emOccurrenceSourceId does not exist. Must ignore occurrence import");
329 return true;
330 }else{
331 return false;
332 }
333 }
334 }
335
336 }