Project

General

Profile

Download (10.7 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.berlinModel.in;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.ArrayList;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20

    
21
import org.apache.log4j.Logger;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceSourceImportValidator;
26
import eu.etaxonomy.cdm.io.common.IOValidator;
27
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
28
import eu.etaxonomy.cdm.io.common.Source;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
31
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
32
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
33
import eu.etaxonomy.cdm.model.description.Distribution;
34
import eu.etaxonomy.cdm.model.name.NonViralName;
35
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
36
import eu.etaxonomy.cdm.model.reference.Reference;
37

    
38

    
39
/**
40
 * @author a.mueller
41
 * @created 20.03.2008
42
 */
43
@Component
44
public class BerlinModelOccurrenceSourceImport  extends BerlinModelImportBase {
45
	private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceSourceImport.class);
46

    
47
	private static int modCount = 5000;
48
	private static final String pluralString = "occurrence sources";
49
	private static final String dbTableName = "emOccurrenceSource";  //??
50
	
51
	
52
	private Map<String, Integer> sourceNumberRefIdMap;
53
	private Set<String> unfoundReferences = new HashSet<String>();
54
	
55

    
56
	public BerlinModelOccurrenceSourceImport(){
57
		super(dbTableName, pluralString);
58
	}
59
	
60
	@Override
61
	protected String getIdQuery(BerlinModelImportState state) {
62
		String result = "SELECT occurrenceSourceId FROM " + getTableName();
63
		if (state.getConfig().getOccurrenceSourceFilter() != null){
64
			result += " WHERE " +  state.getConfig().getOccurrenceSourceFilter();
65
		}
66
		return result;
67
	}
68

    
69
	@Override
70
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
71
			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution 
72
            " SELECT * " + 
73
                " FROM emOccurrenceSource " +  
74
            " WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN + ")  )" +  
75
             "";
76
		return strQuery;
77
	}
78
	
79
	
80

    
81
	@Override
82
	protected void doInvoke(BerlinModelImportState state) {
83
		unfoundReferences = new HashSet<String>();
84
		
85
		try {
86
			sourceNumberRefIdMap = makeSourceNumberReferenceIdMap(state);
87
		} catch (SQLException e) {
88
			e.printStackTrace();
89
			throw new RuntimeException(e);
90
		}
91
		super.doInvoke(state);
92
		sourceNumberRefIdMap = null;
93
		if (unfoundReferences.size()>0){
94
			String unfound = "'" + CdmUtils.concat("','", unfoundReferences.toArray(new String[]{})) + "'"; 
95
			logger.warn("Not found references: " + unfound);
96
		}
97
		return;
98
	}
99

    
100
	@Override
101
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
102
		boolean success = true;
103
		ResultSet rs = partitioner.getResultSet();
104
		Map<String, Reference> refMap = partitioner.getObjectMap(BerlinModelReferenceImport.REFERENCE_NAMESPACE);
105
		
106
		Set<DescriptionElementBase> objectsToSave = new HashSet<DescriptionElementBase>();
107
		try {
108
			int i = 0;
109
			//for each reference
110
            while (rs.next()){
111
                
112
                if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("occurrence sources handled: " + (i-1));}
113
                
114
                Integer occurrenceSourceId = rs.getInt("OccurrenceSourceId");
115
                Integer occurrenceFk =nullSafeInt(rs, "OccurrenceFk");
116
    			String sourceNumber = rs.getString("SourceNumber");
117
    			String oldName = rs.getString("OldName");
118
    			Integer oldNameFk = nullSafeInt(rs, "OldNameFk");
119
    			
120
    			Distribution distribution = (Distribution)state.getRelatedObject(BerlinModelOccurrenceImport.NAMESPACE, String.valueOf(occurrenceFk));
121
                
122
    			if (distribution == null){
123
    				//distribution = duplicateMap.get(occurrenceFk);
124
    			}
125
    			if (distribution != null){
126
    				Integer refId = sourceNumberRefIdMap.get(sourceNumber);
127
    				Reference<?> ref = refMap.get(String.valueOf(refId));
128

    
129
    				if (ref != null){
130
    					DescriptionElementSource originalSource = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
131
    					originalSource.setCitation(ref);
132
    					TaxonNameBase<?, ?> taxonName;
133
						taxonName = getName(state, oldName, oldNameFk);
134
						if (taxonName != null){
135
    						originalSource.setNameUsedInSource(taxonName);
136
    					}else if(isNotBlank(oldName)){
137
    						originalSource.setOriginalNameString(oldName);
138
    					}
139
    					distribution.addSource(originalSource);
140
    				}else{
141
    					logger.warn("reference for sourceNumber "+sourceNumber+" could not be found. OccurrenceSourceId: " + occurrenceSourceId );
142
    					unfoundReferences.add(sourceNumber);
143
    				}
144
    			}else{
145
    				logger.warn("distribution ("+occurrenceFk+") for occurrence source (" + occurrenceSourceId + ") could not be found." );
146
    			}
147
                
148
            }
149
			logger.info("Distributions to save: " + objectsToSave.size());
150
			getDescriptionService().saveDescriptionElement(objectsToSave);	
151
			
152
			return success;
153
		} catch (SQLException e) {
154
			logger.error("SQLException:" +  e);
155
			return false;
156
		}
157
	}
158

    
159

    
160
	@Override
161
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
162
		String nameSpace;
163
		Class<?> cdmClass;
164
		Set<String> idSet;
165
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
166
		
167
		try{
168
			Set<String> occurrenceIdSet = new HashSet<String>();
169
			Set<String> referenceIdSet = new HashSet<String>();
170
			Set<String> nameIdSet = new HashSet<String>();
171
			Set<String> sourceNumberSet = new HashSet<String>();
172
			while (rs.next()){
173
				handleForeignKey(rs, occurrenceIdSet, "occurrenceFk");
174
				handleForeignKey(rs, nameIdSet, "oldNameFk");
175
				sourceNumberSet.add(CdmUtils.NzTrim(rs.getString("SourceNumber")));
176
			}
177
			
178
			sourceNumberSet.remove("");
179
			referenceIdSet = handleSourceNumber(rs, sourceNumberSet, result);
180
			
181
			
182
			//occurrence map
183
			nameSpace = BerlinModelOccurrenceImport.NAMESPACE;
184
			cdmClass = Distribution.class;
185
			idSet = occurrenceIdSet;
186
			Map<String, Distribution> occurrenceMap = (Map<String, Distribution>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
187
			result.put(nameSpace, occurrenceMap);
188

    
189
			//name map
190
			nameSpace = BerlinModelTaxonNameImport.NAMESPACE;
191
			cdmClass = TaxonNameBase.class;
192
			idSet =nameIdSet;
193
			Map<String, TaxonNameBase> nameMap = (Map<String, TaxonNameBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
194
			result.put(nameSpace, nameMap);
195
			
196
			//reference map
197
			nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
198
			cdmClass = Reference.class;
199
			idSet = referenceIdSet;
200
			Map<String, Reference> referenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
201
			result.put(nameSpace, referenceMap);
202

    
203
		} catch (SQLException e) {
204
			throw new RuntimeException(e);
205
		}
206
		return result;
207
	}
208

    
209
	private Set<String> handleSourceNumber(ResultSet rs, Set<String> sourceNumberSet, Map<Object, Map<String, ? extends CdmBase>> result) {
210
		Map<String, Integer> sourceNumberReferenceIdMap = this.sourceNumberRefIdMap;
211
		Set<String> referenceIdSet = new HashSet<String>();
212
		
213
		for(String sourceNumber : sourceNumberSet){
214
			Integer refId = sourceNumberReferenceIdMap.get(sourceNumber);
215
			referenceIdSet.add(String.valueOf(refId));		
216
		}
217
		return referenceIdSet;
218
	}
219

    
220
	
221
	
222
	/**
223
	 * @param state
224
	 * @param oldName
225
	 * @param oldNameFk
226
	 * @return
227
	 */
228
	boolean isFirstTimeNoNameByService = true;
229
	private TaxonNameBase<?, ?> getName(BerlinModelImportState state, String oldName, Integer oldNameFk) {
230
		TaxonNameBase<?,?> taxonName = (TaxonNameBase)state.getRelatedObject(BerlinModelTaxonNameImport.NAMESPACE, String.valueOf(oldNameFk));
231
		if (taxonName == null && oldName != null){
232
			if (isFirstTimeNoNameByService){
233
				logger.warn("oldName not checked against names in BerlinModel. Just take it as a string");
234
				isFirstTimeNoNameByService = false;
235
			}
236
			List<NonViralName> names = new ArrayList<NonViralName>();
237
//			names = getNameService().getNamesByNameCache(oldName);
238
			if (names.size() == 1){
239
				return names.get(0);
240
			}else {
241
				if (names.size()> 2){
242
					logger.info("There is more than one name matching oldName: " + oldName + ".");
243
				}
244
				return null;
245
				//taxonName = nameParser.parseSimpleName(oldName);
246
			}
247
		}
248
		return taxonName;
249
	}
250

    
251
	/**
252
	 * Creates a map which maps source numbers on references
253
	 * @param state
254
	 * @return
255
     * @throws SQLException 
256
	 */
257
	private Map<String, Integer> makeSourceNumberReferenceIdMap(BerlinModelImportState state) throws SQLException {
258
		Map<String, Integer> result = new HashMap<String, Integer>();
259
		
260
		Source source = state.getConfig().getSource();
261
		String strQuery = " SELECT RefId, IdInSource " +  
262
						  " FROM Reference " + 
263
						  " WHERE     (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
264
		
265
		ResultSet rs = source.getResultSet(strQuery) ;
266
		while (rs.next()){
267
			int refId = rs.getInt("RefId");
268
			String idInSource = rs.getString("IdInSource");
269
			if (idInSource != null){
270
				String[] singleSources = idInSource.split("\\|");
271
				for (String singleSource : singleSources){
272
					singleSource = singleSource.trim();
273
					result.put(singleSource, refId);
274
				}
275
			}
276
		}
277
		return result;
278
	}
279

    
280
	@Override
281
	protected boolean doCheck(BerlinModelImportState state){
282
		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceSourceImportValidator();
283
		return validator.validate(state);
284
	}
285

    
286
	@Override
287
	protected boolean isIgnore(BerlinModelImportState state){
288
		if (! state.getConfig().isDoOccurrence()){
289
			return true;
290
		}else{
291
			if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrenceSource", "OccurrenceSourceId")){
292
				logger.error("emOccurrenceSource table or emOccurrenceSourceId does not exist. Must ignore occurrence import");
293
				return true;
294
			}else{
295
				return false;
296
			}
297
		}
298
	}
299
	
300
}
(11-11/21)