Project

General

Profile

Download (14.9 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.io.common.IOValidator;
25
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
26
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.Language;
29
import eu.etaxonomy.cdm.model.description.Distribution;
30
import eu.etaxonomy.cdm.model.description.PresenceTerm;
31
import eu.etaxonomy.cdm.model.description.TaxonDescription;
32
import eu.etaxonomy.cdm.model.location.NamedArea;
33
import eu.etaxonomy.cdm.model.name.Rank;
34
import eu.etaxonomy.cdm.model.name.ZoologicalName;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.taxon.Classification;
37
import eu.etaxonomy.cdm.model.taxon.Taxon;
38
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
39
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
40

    
41

    
42
/**
43
 * @author a.mueller
44
 * @created 20.02.2010
45
 * @version 1.0
46
 */
47
@Component
48
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
49
	private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
50
	
51
	private int modCount = 10000;
52
	private static final String pluralString = "current taxa";
53
	private static final String dbTableName = "current_species";
54
	private static final Class cdmTargetClass = Taxon.class;  //not needed
55
	
56
	public GlobisCurrentSpeciesImport(){
57
		super(pluralString, dbTableName, cdmTargetClass);
58
	}
59

    
60

    
61
	
62
	
63
	/* (non-Javadoc)
64
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
65
	 */
66
	@Override
67
	protected String getIdQuery() {
68
		String strRecordQuery = 
69
			" SELECT IDcurrentspec " + 
70
			" FROM " + dbTableName; 
71
		return strRecordQuery;	
72
	}
73

    
74

    
75

    
76

    
77
	/* (non-Javadoc)
78
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
79
	 */
80
	@Override
81
	protected String getRecordQuery(GlobisImportConfigurator config) {
82
		String strRecordQuery = 
83
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
84
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " + 
85
			" FROM " + getTableName() + " cs " +
86
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
87
		return strRecordQuery;
88
	}
89
	
90

    
91

    
92
	/* (non-Javadoc)
93
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
94
	 */
95
	@Override
96
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
97
		boolean success = true;
98
		
99
		Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
100
		
101
		Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
102
//		Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
103
		
104
		ResultSet rs = partitioner.getResultSet();
105

    
106
		Classification classification = getClassification(state);
107
		
108
		try {
109
			
110
			int i = 0;
111

    
112
			//for each reference
113
            while (rs.next()){
114
                
115
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
116
				
117
        		Integer taxonId = rs.getInt("IDcurrentspec");
118
        		
119
        		//String dtSpcJahr -> ignore !
120
        		//empty: fiSpcLiteratur
121
        		
122
        		//TODO
123
        		//fiSpcspcgrptax
124
        		
125
        	
126
        		
127
				try {
128
					
129
					//source ref
130
					Reference<?> sourceRef = state.getTransactionalSourceReference();
131
					Taxon nextHigherTaxon = null;
132
					
133
					boolean hasNewParent = false; //true if any parent is new
134
					
135
					//species
136
					Taxon species = createObject(rs, state, taxonId);
137
					
138
					
139
					String familyStr = rs.getString("dtSpcFamakt");
140
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
141
					String tribeStr = rs.getString("dtSpcTribakt");
142
					
143
					//family
144
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap, taxonId);
145
					
146
					//subfamily
147
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap, taxonId);
148
					Taxon subFamilyParent = getParent(subFamily, classification);
149
					if (subFamilyParent != null){
150
						if (! compareTaxa(family, subFamilyParent)){
151
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
152
						}
153
					}else{
154
						classification.addParentChild(family, subFamily, sourceRef, null);
155
					}
156
					nextHigherTaxon = subFamily;
157
					
158
					//tribe
159
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap, taxonId);
160
					if (tribe != null){
161
						Taxon tribeParent = getParent(tribe, classification);
162
						if (tribeParent != null){
163
							if (! compareTaxa(subFamily, tribeParent)){
164
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
165
							}
166
						}else{
167
							classification.addParentChild(subFamily, tribe, sourceRef, null);
168
						}
169
						nextHigherTaxon = tribe;
170
					}					
171

    
172
					
173
					//genus
174
					String genusStr = rs.getString("dtSpcGenusakt");
175
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
176
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap, taxonId);
177
					Taxon genusParent = getParent(genus, classification);
178
					
179
					if (genusParent != null){
180
						if (! compareTaxa(genusParent, nextHigherTaxon)){
181
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
182
						}
183
					}else{
184
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
185
					}
186
					nextHigherTaxon = genus;
187
					
188
					//subgenus
189
					String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
190
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
191
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
192
					if (hasSubgenus){
193
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap, taxonId);
194
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
195
						nextHigherTaxon = subGenus;
196
					}
197
					
198
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
199
					
200
					handleCountries(state, rs, species);
201
					
202
					handleCommonNames(state, rs, species);
203
					
204
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
205
					
206
					objectsToSave.add(species); 
207
					
208

    
209
				} catch (Exception e) {
210
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
211
//					e.printStackTrace();
212
				} 
213
                
214
            }
215
           
216
//            logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
217

    
218
			logger.warn(pluralString + " to save: " + objectsToSave.size());
219
			getTaxonService().save(objectsToSave);	
220
			
221
			return success;
222
		} catch (SQLException e) {
223
			logger.error("SQLException:" +  e);
224
			return false;
225
		}
226
	}
227

    
228
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
229
		String countriesStr = rs.getString("dtSpcCountries");
230
		if (isBlank(countriesStr)){
231
			return;
232
		}
233
		String[] countriesSplit = countriesStr.split(";");
234
		for (String countryStr : countriesSplit){
235
			if (isBlank(countryStr)){
236
				continue;
237
			}
238
			countryStr = countryStr.trim();
239
			
240
			//TODO use isComplete
241
			boolean isComplete = countryStr.endsWith(".");
242
			if (isComplete){
243
				countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
244
			}
245
			boolean isDoubtful = countryStr.endsWith("[?]");
246
			if (isDoubtful){
247
				countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
248
			}
249
			if (countryStr.startsWith("?")){
250
				isDoubtful = true;
251
				countryStr = countryStr.substring(1).trim();
252
			}
253
			
254
			
255
			
256
			countryStr = normalizeCountry(countryStr);
257
			
258
			NamedArea country = getCountry(state, countryStr);
259
			
260
			PresenceTerm status;
261
			if (isDoubtful){
262
				status = PresenceTerm.PRESENT_DOUBTFULLY();
263
			}else{
264
				status = PresenceTerm.PRESENT();
265
			}
266
			
267
			if (country != null){
268
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
269
				Distribution distribution = Distribution.NewInstance(country, status);
270
				desc.addElement(distribution);
271
			}else{
272
				logger.warn("Country string not recognized: " + countryStr);
273
			}
274
		}
275
	}
276

    
277

    
278

    
279
	/**
280
	 * @param countryStr
281
	 * @return
282
	 */
283
	private String normalizeCountry(String countryStr) {
284
		String result = countryStr.trim();
285
		if (result.endsWith(".")){
286
			result = result.substring(0,result.length() - 1);
287
		}
288
		return result; 
289
	}
290
	
291
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
292
		//DON't use, use seperate common name tables instead
293
		
294
//		String commonNamesStr = rs.getString("vernacularnames");
295
//		if (isBlank(commonNamesStr)){
296
//			return;
297
//		}
298
//		String[] commonNamesSplit = commonNamesStr.split(";");
299
//		for (String commonNameStr : commonNamesSplit){
300
//			if (isBlank(commonNameStr)){
301
//				continue;
302
//			}
303
//			Language language = null; //TODO
304
//			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
305
//			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
306
//			desc.addElement(commonName);
307
//		}
308
	}
309

    
310

    
311

    
312

    
313
	/**
314
	 * Compares 2 taxa, returns true of both taxa look similar
315
	 * @param genus
316
	 * @param nextHigherTaxon
317
	 * @return
318
	 */
319
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
320
		ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
321
		ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
322
		if (!name1.getRank().equals(name2.getRank())){
323
			return false;
324
		}
325
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
326
			return false;
327
		}
328
		return true;
329
	}
330

    
331

    
332

    
333

    
334
	private Taxon getParent(Taxon child, Classification classification) {
335
		for (TaxonNode node :  child.getTaxonNodes()){
336
			if (node.getClassification().equals(classification)){
337
				if (node.getParent() != null){
338
					return node.getParent().getTaxon();	
339
				}else{
340
					return null;
341
				}
342
			}
343
		}
344
		return null;
345
	}
346

    
347

    
348

    
349

    
350
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap, Integer taxonId) {
351
		if (isBlank(uninomial)){
352
			return null;
353
		}
354
		
355
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
356
		
357
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
358
		Taxon taxon = taxonMap.get(key);
359
		if (taxon == null){
360
			ZoologicalName name = ZoologicalName.NewInstance(rank);
361
			name.setGenusOrUninomial(uninomial);
362
			if (isNotBlank(infraGenericEpi)){
363
				name.setInfraGenericEpithet(infraGenericEpi);
364
			}
365
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
366
			
367
			taxonMap.put(key, taxon);
368
			handleAuthorAndYear(author, name, taxonId);
369
			getTaxonService().save(taxon);
370
		}
371
		
372
		return taxon;
373
	}
374

    
375

    
376
	//fast and dirty is enough here
377
	private Classification classification;
378
	
379
	private Classification getClassification(GlobisImportState state) {
380
		if (this.classification == null){
381
			String name = state.getConfig().getClassificationName();
382
			Reference<?> reference = state.getTransactionalSourceReference();
383
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
384
			classification.setUuid(state.getConfig().getClassificationUuid());
385
			getClassificationService().save(classification);
386
		}
387
		return this.classification;
388
		
389
	}
390

    
391
	/* (non-Javadoc)
392
	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
393
	 */
394
	public Taxon createObject(ResultSet rs, GlobisImportState state, Integer taxonId)
395
			throws SQLException {
396
		String speciesEpi = rs.getString("dtSpcSpcakt");
397
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
398
		String genusEpi = rs.getString("dtSpcGenusakt");
399
		String author = rs.getString("dtSpcAutor");
400
		
401
		
402
		ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
403
		zooName.setSpecificEpithet(speciesEpi);
404
		if (StringUtils.isNotBlank(subGenusEpi)){
405
			zooName.setInfraGenericEpithet(subGenusEpi);
406
		}
407
		zooName.setGenusOrUninomial(genusEpi);
408
		handleAuthorAndYear(author, zooName, taxonId);
409
		
410
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
411
		
412
		return taxon;
413
	}
414

    
415

    
416

    
417

    
418

    
419
	/* (non-Javadoc)
420
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
421
	 */
422
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
423
		String nameSpace;
424
		Class cdmClass;
425
		Set<String> idSet;
426
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
427
		try{
428
			Set<String> taxonIdSet = new HashSet<String>();
429
			
430
			while (rs.next()){
431
//				handleForeignKey(rs, taxonIdSet, "taxonId");
432
			}
433
			
434
			//taxon map
435
			nameSpace = TAXON_NAMESPACE;
436
			cdmClass = Taxon.class;
437
			idSet = taxonIdSet;
438
			Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
439
			result.put(nameSpace, objectMap);
440

    
441
			
442
		} catch (SQLException e) {
443
			throw new RuntimeException(e);
444
		}
445
		return result;
446
	}
447
	
448
	/* (non-Javadoc)
449
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
450
	 */
451
	@Override
452
	protected boolean doCheck(GlobisImportState state){
453
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
454
		return validator.validate(state);
455
	}
456
	
457
	
458
	/* (non-Javadoc)
459
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
460
	 */
461
	protected boolean isIgnore(GlobisImportState state){
462
		return ! state.getConfig().isDoCurrentTaxa();
463
	}
464

    
465

    
466

    
467

    
468

    
469
}
(2-2/9)