Project

General

Profile

Download (14.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.io.common.IOValidator;
25
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
26
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.Language;
29
import eu.etaxonomy.cdm.model.description.Distribution;
30
import eu.etaxonomy.cdm.model.description.PresenceTerm;
31
import eu.etaxonomy.cdm.model.description.TaxonDescription;
32
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
33
import eu.etaxonomy.cdm.model.name.Rank;
34
import eu.etaxonomy.cdm.model.name.ZoologicalName;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.taxon.Classification;
37
import eu.etaxonomy.cdm.model.taxon.Taxon;
38
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
39
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
40

    
41

    
42
/**
43
 * @author a.mueller
44
 * @created 20.02.2010
45
 * @version 1.0
46
 */
47
@Component
48
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
49
	private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
50
	
51
	private int modCount = 10000;
52
	private static final String pluralString = "current taxa";
53
	private static final String dbTableName = "current_species";
54
	private static final Class cdmTargetClass = Taxon.class;  //not needed
55
	
56
	public GlobisCurrentSpeciesImport(){
57
		super(pluralString, dbTableName, cdmTargetClass);
58
	}
59

    
60

    
61
	
62
	
63
	/* (non-Javadoc)
64
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
65
	 */
66
	@Override
67
	protected String getIdQuery() {
68
		String strRecordQuery = 
69
			" SELECT IDcurrentspec " + 
70
			" FROM " + dbTableName; 
71
		return strRecordQuery;	
72
	}
73

    
74

    
75

    
76

    
77
	/* (non-Javadoc)
78
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
79
	 */
80
	@Override
81
	protected String getRecordQuery(GlobisImportConfigurator config) {
82
		String strRecordQuery = 
83
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
84
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " + 
85
			" FROM " + getTableName() + " cs " +
86
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
87
		return strRecordQuery;
88
	}
89
	
90

    
91

    
92
	/* (non-Javadoc)
93
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
94
	 */
95
	@Override
96
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
97
		boolean success = true;
98
		
99
		Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
100
		
101
		Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
102
//		Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
103
		
104
		ResultSet rs = partitioner.getResultSet();
105

    
106
		Classification classification = getClassification(state);
107
		
108
		try {
109
			
110
			int i = 0;
111

    
112
			//for each reference
113
            while (rs.next()){
114
                
115
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
116
				
117
        		Integer taxonId = rs.getInt("IDcurrentspec");
118
        		
119
        		
120
        		//String dtSpcJahr -> ignore !
121
        		//empty: fiSpcLiteratur
122
        		
123
        		//TODO
124
        		//fiSpcspcgrptax
125
        		
126
        	
127
        		
128
				try {
129
					
130
					//source ref
131
					Reference<?> sourceRef = state.getTransactionalSourceReference();
132
					Taxon nextHigherTaxon = null;
133
					
134
					boolean hasNewParent = false; //true if any parent is new
135
					
136
					//species
137
					Taxon species = createObject(rs, state);
138
					
139
					
140
					String familyStr = rs.getString("dtSpcFamakt");
141
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
142
					String tribeStr = rs.getString("dtSpcTribakt");
143
					
144
					//family
145
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap);
146
					
147
					//subfamily
148
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap);
149
					Taxon subFamilyParent = getParent(subFamily, classification);
150
					if (subFamilyParent != null){
151
						if (! compareTaxa(family, subFamilyParent)){
152
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
153
						}
154
					}else{
155
						classification.addParentChild(family, subFamily, sourceRef, null);
156
					}
157
					nextHigherTaxon = subFamily;
158
					
159
					//tribe
160
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap);
161
					if (tribe != null){
162
						Taxon tribeParent = getParent(tribe, classification);
163
						if (tribeParent != null){
164
							if (! compareTaxa(subFamily, tribeParent)){
165
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
166
							}
167
						}else{
168
							classification.addParentChild(subFamily, tribe, sourceRef, null);
169
						}
170
						nextHigherTaxon = tribe;
171
					}					
172

    
173
					
174
					//genus
175
					String genusStr = rs.getString("dtSpcGenusakt");
176
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
177
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap);
178
					Taxon genusParent = getParent(genus, classification);
179
					
180
					if (genusParent != null){
181
						if (! compareTaxa(genusParent, nextHigherTaxon)){
182
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
183
						}
184
					}else{
185
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
186
					}
187
					nextHigherTaxon = genus;
188
					
189
					//subgenus
190
					String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
191
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
192
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
193
					if (hasSubgenus){
194
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap);
195
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
196
						nextHigherTaxon = subGenus;
197
					}
198
					
199
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
200
					
201
					handleCountries(state, rs, species);
202
					
203
					handleCommonNames(state, rs, species);
204
					
205
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
206
					
207
					objectsToSave.add(species); 
208
					
209

    
210
				} catch (Exception e) {
211
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
212
//					e.printStackTrace();
213
				} 
214
                
215
            }
216
           
217
//            logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
218

    
219
			logger.warn(pluralString + " to save: " + objectsToSave.size());
220
			getTaxonService().save(objectsToSave);	
221
			
222
			return success;
223
		} catch (SQLException e) {
224
			logger.error("SQLException:" +  e);
225
			return false;
226
		}
227
	}
228

    
229
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
230
		String countriesStr = rs.getString("dtSpcCountries");
231
		if (isBlank(countriesStr)){
232
			return;
233
		}
234
		String[] countriesSplit = countriesStr.split(";");
235
		for (String countryStr : countriesSplit){
236
			if (isBlank(countryStr)){
237
				continue;
238
			}
239
			countryStr = countryStr.trim();
240
			
241
			//TODO use isComplete
242
			boolean isComplete = countryStr.endsWith(".");
243
			if (isComplete){
244
				countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
245
			}
246
			boolean isDoubtful = countryStr.endsWith("[?]");
247
			if (isDoubtful){
248
				countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
249
			}
250
			if (countryStr.startsWith("?")){
251
				isDoubtful = true;
252
				countryStr = countryStr.substring(1).trim();
253
			}
254
			
255
			
256
			
257
			countryStr = normalizeCountry(countryStr);
258
			
259
			WaterbodyOrCountry country = getCountry(state, countryStr);
260
			
261
			PresenceTerm status;
262
			if (isDoubtful){
263
				status = PresenceTerm.PRESENT_DOUBTFULLY();
264
			}else{
265
				status = PresenceTerm.PRESENT();
266
			}
267
			
268
			if (country != null){
269
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
270
				Distribution distribution = Distribution.NewInstance(country, status);
271
				desc.addElement(distribution);
272
			}else{
273
				logger.warn("Country string not recognized: " + countryStr);
274
			}
275
		}
276
	}
277

    
278

    
279

    
280
	/**
281
	 * @param countryStr
282
	 * @return
283
	 */
284
	private String normalizeCountry(String countryStr) {
285
		String result = countryStr.trim();
286
		if (result.endsWith(".")){
287
			result = result.substring(0,result.length() - 1);
288
		}
289
		return result; 
290
	}
291
	
292
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
293
		//DON't use, use seperate common name tables instead
294
		
295
//		String commonNamesStr = rs.getString("vernacularnames");
296
//		if (isBlank(commonNamesStr)){
297
//			return;
298
//		}
299
//		String[] commonNamesSplit = commonNamesStr.split(";");
300
//		for (String commonNameStr : commonNamesSplit){
301
//			if (isBlank(commonNameStr)){
302
//				continue;
303
//			}
304
//			Language language = null; //TODO
305
//			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
306
//			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
307
//			desc.addElement(commonName);
308
//		}
309
	}
310

    
311

    
312

    
313

    
314
	/**
315
	 * Compares 2 taxa, returns true of both taxa look similar
316
	 * @param genus
317
	 * @param nextHigherTaxon
318
	 * @return
319
	 */
320
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
321
		ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
322
		ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
323
		if (!name1.getRank().equals(name2.getRank())){
324
			return false;
325
		}
326
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
327
			return false;
328
		}
329
		return true;
330
	}
331

    
332

    
333

    
334

    
335
	private Taxon getParent(Taxon child, Classification classification) {
336
		for (TaxonNode node :  child.getTaxonNodes()){
337
			if (node.getClassification().equals(classification)){
338
				if (node.getParent() != null){
339
					return node.getParent().getTaxon();	
340
				}else{
341
					return null;
342
				}
343
			}
344
		}
345
		return null;
346
	}
347

    
348

    
349

    
350

    
351
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap) {
352
		if (isBlank(uninomial)){
353
			return null;
354
		}
355
		
356
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
357
		
358
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
359
		Taxon taxon = taxonMap.get(key);
360
		if (taxon == null){
361
			ZoologicalName name = ZoologicalName.NewInstance(rank);
362
			name.setGenusOrUninomial(uninomial);
363
			if (isNotBlank(infraGenericEpi)){
364
				name.setInfraGenericEpithet(infraGenericEpi);
365
			}
366
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
367
			
368
			taxonMap.put(key, taxon);
369
			handleAuthorAndYear(author, name);
370
			getTaxonService().save(taxon);
371
		}
372
		
373
		return taxon;
374
	}
375

    
376

    
377
	//fast and dirty is enough here
378
	private Classification classification;
379
	
380
	private Classification getClassification(GlobisImportState state) {
381
		if (this.classification == null){
382
			String name = state.getConfig().getClassificationName();
383
			Reference<?> reference = state.getTransactionalSourceReference();
384
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
385
			classification.setUuid(state.getConfig().getClassificationUuid());
386
			getClassificationService().save(classification);
387
		}
388
		return this.classification;
389
		
390
	}
391

    
392
	/* (non-Javadoc)
393
	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
394
	 */
395
	public Taxon createObject(ResultSet rs, GlobisImportState state)
396
			throws SQLException {
397
		String speciesEpi = rs.getString("dtSpcSpcakt");
398
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
399
		String genusEpi = rs.getString("dtSpcGenusakt");
400
		String author = rs.getString("dtSpcAutor");
401
		
402
		
403
		ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
404
		zooName.setSpecificEpithet(speciesEpi);
405
		if (StringUtils.isNotBlank(subGenusEpi)){
406
			zooName.setInfraGenericEpithet(subGenusEpi);
407
		}
408
		zooName.setGenusOrUninomial(genusEpi);
409
		handleAuthorAndYear(author, zooName);
410
		
411
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
412
		
413
		return taxon;
414
	}
415

    
416

    
417

    
418

    
419

    
420
	/* (non-Javadoc)
421
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
422
	 */
423
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
424
		String nameSpace;
425
		Class cdmClass;
426
		Set<String> idSet;
427
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
428
		try{
429
			Set<String> taxonIdSet = new HashSet<String>();
430
			
431
			while (rs.next()){
432
//				handleForeignKey(rs, taxonIdSet, "taxonId");
433
			}
434
			
435
			//taxon map
436
			nameSpace = TAXON_NAMESPACE;
437
			cdmClass = Taxon.class;
438
			idSet = taxonIdSet;
439
			Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
440
			result.put(nameSpace, objectMap);
441

    
442
			
443
		} catch (SQLException e) {
444
			throw new RuntimeException(e);
445
		}
446
		return result;
447
	}
448
	
449
	/* (non-Javadoc)
450
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
451
	 */
452
	@Override
453
	protected boolean doCheck(GlobisImportState state){
454
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
455
		return validator.validate(state);
456
	}
457
	
458
	
459
	/* (non-Javadoc)
460
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
461
	 */
462
	protected boolean isIgnore(GlobisImportState state){
463
		return ! state.getConfig().isDoCurrentTaxa();
464
	}
465

    
466

    
467

    
468

    
469

    
470
}
(1-1/9)