Project

General

Profile

Download (14.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.UTF8;
25
import eu.etaxonomy.cdm.io.common.IOValidator;
26
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.model.common.Language;
30
import eu.etaxonomy.cdm.model.description.Distribution;
31
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
32
import eu.etaxonomy.cdm.model.description.TaxonDescription;
33
import eu.etaxonomy.cdm.model.location.NamedArea;
34
import eu.etaxonomy.cdm.model.name.Rank;
35
import eu.etaxonomy.cdm.model.name.ZoologicalName;
36
import eu.etaxonomy.cdm.model.reference.Reference;
37
import eu.etaxonomy.cdm.model.taxon.Classification;
38
import eu.etaxonomy.cdm.model.taxon.Taxon;
39
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
41

    
42

    
43
/**
44
 * @author a.mueller
45
 * @created 20.02.2010
46
 */
47
@Component
48
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
49
	private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
50
	
51
	private int modCount = 10000;
52
	private static final String pluralString = "current taxa";
53
	private static final String dbTableName = "current_species";
54
	private static final Class<?> cdmTargetClass = Taxon.class;  //not needed
55
	
56
	public GlobisCurrentSpeciesImport(){
57
		super(pluralString, dbTableName, cdmTargetClass);
58
	}
59

    
60
	@Override
61
	protected String getIdQuery() {
62
		String strRecordQuery = 
63
			" SELECT IDcurrentspec " + 
64
			" FROM " + dbTableName; 
65
		return strRecordQuery;	
66
	}
67

    
68
	@Override
69
	protected String getRecordQuery(GlobisImportConfigurator config) {
70
		String strRecordQuery = 
71
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
72
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " + 
73
			" FROM " + getTableName() + " cs " +
74
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
75
		return strRecordQuery;
76
	}
77
	
78
	@Override
79
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
80
		boolean success = true;
81
		
82
		Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
83
		Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
84
		ResultSet rs = partitioner.getResultSet();
85

    
86
		Classification classification = getClassification(state);
87
		
88
		try {
89
			
90
			int i = 0;
91

    
92
			//for each reference
93
            while (rs.next()){
94
                
95
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
96
				
97
        		Integer taxonId = rs.getInt("IDcurrentspec");
98
        		
99
        		//String dtSpcJahr -> ignore !
100
        		//empty: fiSpcLiteratur
101
        		
102
        		//TODO
103
        		//fiSpcspcgrptax
104
        		
105
				try {
106
					
107
					//source ref
108
					Reference<?> sourceRef = state.getTransactionalSourceReference();
109
					Taxon nextHigherTaxon = null;
110
					
111
					boolean hasNewParent = false; //true if any parent is new
112
					
113
					//species
114
					Taxon species = createObject(rs, state, taxonId);
115
					
116
					
117
					String familyStr = rs.getString("dtSpcFamakt");
118
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
119
					String tribeStr = rs.getString("dtSpcTribakt");
120
					
121
					//family
122
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap, taxonId);
123
					
124
					//subfamily
125
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap, taxonId);
126
					Taxon subFamilyParent = getParent(subFamily, classification);
127
					if (subFamilyParent != null){
128
						if (! compareTaxa(family, subFamilyParent)){
129
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
130
						}
131
					}else{
132
						classification.addParentChild(family, subFamily, sourceRef, null);
133
					}
134
					nextHigherTaxon = subFamily;
135
					
136
					//tribe
137
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap, taxonId);
138
					if (tribe != null){
139
						Taxon tribeParent = getParent(tribe, classification);
140
						if (tribeParent != null){
141
							if (! compareTaxa(subFamily, tribeParent)){
142
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
143
							}
144
						}else{
145
							classification.addParentChild(subFamily, tribe, sourceRef, null);
146
						}
147
						nextHigherTaxon = tribe;
148
					}					
149

    
150
					
151
					//genus
152
					String genusStr = rs.getString("dtSpcGenusakt");
153
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
154
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap, taxonId);
155
					Taxon genusParent = getParent(genus, classification);
156
					
157
					if (genusParent != null){
158
						if (! compareTaxa(genusParent, nextHigherTaxon)){
159
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
160
						}
161
					}else{
162
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
163
					}
164
					nextHigherTaxon = genus;
165
					
166
					//subgenus
167
					String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
168
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
169
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
170
					if (hasSubgenus){
171
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap, taxonId);
172
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
173
						nextHigherTaxon = subGenus;
174
					}
175
					
176
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
177
					
178
					handleCountries(state, rs, species, taxonId);
179
					
180
					//common names -> not used anymore
181
					handleCommonNames(state, rs, species);
182
					
183
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
184
					
185
					objectsToSave.add(species); 
186
					
187

    
188
				} catch (Exception e) {
189
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
190
					e.printStackTrace();
191
				} 
192
                
193
            }
194

    
195
			logger.warn(pluralString + " to save: " + objectsToSave.size());
196
			getTaxonService().save(objectsToSave);	
197
			
198
			return success;
199
		} catch (SQLException e) {
200
			logger.error("SQLException:" +  e);
201
			return false;
202
		}
203
	}
204

    
205
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species, Integer taxonId) throws SQLException {
206
		String countriesStr = rs.getString("dtSpcCountries");
207
		if (isBlank(countriesStr)){
208
			return;
209
		}
210
		String[] countriesSplit = countriesStr.split(";");
211
		for (String countryStr : countriesSplit){
212
			if (isBlank(countryStr)){
213
				continue;
214
			}
215
			countryStr = countryStr.trim();
216
			
217
			//TODO use isComplete
218
			boolean isComplete = countryStr.endsWith(".");
219
			if (isComplete){
220
				countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
221
			}
222
			boolean isDoubtful = countryStr.endsWith("[?]");
223
			if (isDoubtful){
224
				countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
225
			}
226
			if (countryStr.startsWith("?")){
227
				isDoubtful = true;
228
				countryStr = countryStr.substring(1).trim();
229
			}
230
			
231
			
232
			
233
			countryStr = normalizeCountry(countryStr);
234
			
235
			NamedArea country = getCountry(state, countryStr);
236
			
237
			PresenceAbsenceTerm status;
238
			if (isDoubtful){
239
				status = PresenceAbsenceTerm.PRESENT_DOUBTFULLY();
240
			}else{
241
				status = PresenceAbsenceTerm.PRESENT();
242
			}
243
			
244
			if (country != null){
245
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
246
				Distribution distribution = Distribution.NewInstance(country, status);
247
				desc.addElement(distribution);
248
			}else{
249
				if (countryStr.length() > 0){
250
					logger.warn("Country string not recognized : " + countryStr + " for IDcurrentspec " + taxonId);
251
				}
252
			}
253
		}
254
	}
255

    
256

    
257

    
258
	/**
259
	 * @param countryStr
260
	 * @return
261
	 */
262
	private String normalizeCountry(String countryStr) {
263
		String result = countryStr.trim();
264
		if (result.endsWith(".")){
265
			result = result.substring(0,result.length() - 1);
266
		}
267
		while (result.startsWith(UTF8.NO_BREAK_SPACE.toString())){
268
			result = result.substring(1);  //
269
		}
270
		if (result.matches("\\s+")){
271
			result = "";
272
		}
273
		return result.trim(); 
274
	}
275
	
276
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
277
		//DON't use, use seperate common name tables instead
278
		
279
//		String commonNamesStr = rs.getString("vernacularnames");
280
//		if (isBlank(commonNamesStr)){
281
//			return;
282
//		}
283
//		String[] commonNamesSplit = commonNamesStr.split(";");
284
//		for (String commonNameStr : commonNamesSplit){
285
//			if (isBlank(commonNameStr)){
286
//				continue;
287
//			}
288
//			Language language = null; //TODO
289
//			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
290
//			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
291
//			desc.addElement(commonName);
292
//		}
293
	}
294

    
295

    
296

    
297

    
298
	/**
299
	 * Compares 2 taxa, returns true of both taxa look similar
300
	 * @param genus
301
	 * @param nextHigherTaxon
302
	 * @return
303
	 */
304
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
305
		ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
306
		ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
307
		if (!name1.getRank().equals(name2.getRank())){
308
			return false;
309
		}
310
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
311
			return false;
312
		}
313
		return true;
314
	}
315

    
316

    
317

    
318

    
319
	private Taxon getParent(Taxon child, Classification classification) {
320
		if (child == null){
321
			logger.warn("Child is null");
322
			return null;
323
		}
324
		for (TaxonNode node :  child.getTaxonNodes()){
325
			if (node.getClassification().equals(classification)){
326
				if (node.getParent() != null){
327
					return node.getParent().getTaxon();	
328
				}else{
329
					return null;
330
				}
331
			}
332
		}
333
		return null;
334
	}
335

    
336

    
337

    
338

    
339
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap, Integer taxonId) {
340
		if (isBlank(uninomial)){
341
			return null;
342
		}
343
		
344
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
345
		
346
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
347
		Taxon taxon = taxonMap.get(key);
348
		if (taxon == null){
349
			ZoologicalName name = ZoologicalName.NewInstance(rank);
350
			name.setGenusOrUninomial(uninomial);
351
			if (isNotBlank(infraGenericEpi)){
352
				name.setInfraGenericEpithet(infraGenericEpi);
353
			}
354
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
355
			
356
			taxonMap.put(key, taxon);
357
			handleAuthorAndYear(author, name, taxonId, state);
358
			getTaxonService().save(taxon);
359
		}
360
		
361
		return taxon;
362
	}
363

    
364

    
365
	//fast and dirty is enough here
366
	private Classification classification;
367
	
368
	private Classification getClassification(GlobisImportState state) {
369
		if (this.classification == null){
370
			String name = state.getConfig().getClassificationName();
371
			Reference<?> reference = state.getTransactionalSourceReference();
372
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
373
			classification.setUuid(state.getConfig().getClassificationUuid());
374
			getClassificationService().save(classification);
375
		}
376
		return this.classification;
377
		
378
	}
379

    
380
	/* (non-Javadoc)
381
	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
382
	 */
383
	public Taxon createObject(ResultSet rs, GlobisImportState state, Integer taxonId)
384
			throws SQLException {
385
		String speciesEpi = rs.getString("dtSpcSpcakt");
386
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
387
		String genusEpi = rs.getString("dtSpcGenusakt");
388
		String author = rs.getString("dtSpcAutor");
389
		
390
		
391
		ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
392
		zooName.setSpecificEpithet(speciesEpi);
393
		if (StringUtils.isNotBlank(subGenusEpi)){
394
			zooName.setInfraGenericEpithet(subGenusEpi);
395
		}
396
		zooName.setGenusOrUninomial(genusEpi);
397
		handleAuthorAndYear(author, zooName, taxonId, state);
398
		
399
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
400
		
401
		return taxon;
402
	}
403

    
404

    
405

    
406

    
407
	@Override
408
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
409
		String nameSpace;
410
		Class cdmClass;
411
		Set<String> idSet;
412
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
413
		try{
414
			Set<String> taxonIdSet = new HashSet<String>();
415
			
416
			while (rs.next()){
417
//				handleForeignKey(rs, taxonIdSet, "taxonId");
418
			}
419
			
420
			//taxon map
421
			nameSpace = TAXON_NAMESPACE;
422
			cdmClass = Taxon.class;
423
			idSet = taxonIdSet;
424
			Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
425
			result.put(nameSpace, objectMap);
426

    
427
			
428
		} catch (SQLException e) {
429
			throw new RuntimeException(e);
430
		}
431
		return result;
432
	}
433
	
434
	/* (non-Javadoc)
435
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
436
	 */
437
	@Override
438
	protected boolean doCheck(GlobisImportState state){
439
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
440
		return validator.validate(state);
441
	}
442
	
443
	
444
	/* (non-Javadoc)
445
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
446
	 */
447
	protected boolean isIgnore(GlobisImportState state){
448
		return ! state.getConfig().isDoCurrentTaxa();
449
	}
450

    
451

    
452

    
453

    
454

    
455
}
(3-3/10)