Project

General

Profile

Download (15.6 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import com.yourkit.util.Strings;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraCollectionImport;
27
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraSpecimenImport;
28
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelTaxonImport;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
32
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
33
import eu.etaxonomy.cdm.model.common.CdmBase;
34
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
35
import eu.etaxonomy.cdm.model.common.Language;
36
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
37
import eu.etaxonomy.cdm.model.description.Distribution;
38
import eu.etaxonomy.cdm.model.description.PresenceTerm;
39
import eu.etaxonomy.cdm.model.description.TaxonDescription;
40
import eu.etaxonomy.cdm.model.location.NamedArea;
41
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.name.ZoologicalName;
44
import eu.etaxonomy.cdm.model.occurrence.Collection;
45
import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
46
import eu.etaxonomy.cdm.model.reference.Reference;
47
import eu.etaxonomy.cdm.model.taxon.Classification;
48
import eu.etaxonomy.cdm.model.taxon.Taxon;
49
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
51
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
52
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
53
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
54

    
55

    
56
/**
57
 * @author a.mueller
58
 * @created 20.02.2010
59
 * @version 1.0
60
 */
61
@Component
62
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
63
	private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
64
	
65
	private int modCount = 10000;
66
	private static final String pluralString = "current taxa";
67
	private static final String dbTableName = "current_species";
68
	private static final Class cdmTargetClass = Taxon.class;  //not needed
69
	
70
	public GlobisCurrentSpeciesImport(){
71
		super(pluralString, dbTableName, cdmTargetClass);
72
	}
73

    
74

    
75
	
76
	
77
	/* (non-Javadoc)
78
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
79
	 */
80
	@Override
81
	protected String getIdQuery() {
82
		String strRecordQuery = 
83
			" SELECT IDcurrentspec " + 
84
			" FROM " + dbTableName; 
85
		return strRecordQuery;	
86
	}
87

    
88

    
89

    
90

    
91
	/* (non-Javadoc)
92
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
93
	 */
94
	@Override
95
	protected String getRecordQuery(GlobisImportConfigurator config) {
96
		String strRecordQuery = 
97
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
98
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " + 
99
			" FROM " + getTableName() + " cs " +
100
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
101
		return strRecordQuery;
102
	}
103
	
104

    
105

    
106
	/* (non-Javadoc)
107
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
108
	 */
109
	@Override
110
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
111
		boolean success = true;
112
		
113
		Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
114
		
115
		Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
116
//		Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
117
		
118
		ResultSet rs = partitioner.getResultSet();
119

    
120
		Classification classification = getClassification(state);
121
		
122
		try {
123
			
124
			int i = 0;
125

    
126
			//for each reference
127
            while (rs.next()){
128
                
129
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
130
				
131
        		Integer taxonId = rs.getInt("IDcurrentspec");
132
        		
133
        		
134
        		//String dtSpcJahr -> ignore !
135
        		//empty: fiSpcLiteratur
136
        		
137
        		//TODO
138
        		//fiSpcspcgrptax
139
        		
140
        	
141
        		
142
				try {
143
					
144
					//source ref
145
					Reference<?> sourceRef = state.getTransactionalSourceReference();
146
					Taxon nextHigherTaxon = null;
147
					
148
					boolean hasNewParent = false; //true if any parent is new
149
					
150
					//species
151
					Taxon species = createObject(rs, state);
152
					
153
					
154
					String familyStr = rs.getString("dtSpcFamakt");
155
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
156
					String tribeStr = rs.getString("dtSpcTribakt");
157
					
158
					//family
159
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap);
160
					
161
					//subfamily
162
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap);
163
					Taxon subFamilyParent = getParent(subFamily, classification);
164
					if (subFamilyParent != null){
165
						if (! compareTaxa(family, subFamilyParent)){
166
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
167
						}
168
					}else{
169
						classification.addParentChild(family, subFamily, sourceRef, null);
170
					}
171
					nextHigherTaxon = subFamily;
172
					
173
					//tribe
174
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap);
175
					if (tribe != null){
176
						Taxon tribeParent = getParent(tribe, classification);
177
						if (tribeParent != null){
178
							if (! compareTaxa(subFamily, tribeParent)){
179
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
180
							}
181
						}else{
182
							classification.addParentChild(subFamily, tribe, sourceRef, null);
183
						}
184
						nextHigherTaxon = tribe;
185
					}					
186

    
187
					
188
					//genus
189
					String genusStr = rs.getString("dtSpcGenusakt");
190
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
191
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap);
192
					Taxon genusParent = getParent(genus, classification);
193
					
194
					if (genusParent != null){
195
						if (! compareTaxa(genusParent, nextHigherTaxon)){
196
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
197
						}
198
					}else{
199
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
200
					}
201
					nextHigherTaxon = genus;
202
					
203
					//subgenus
204
					String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
205
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
206
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
207
					if (hasSubgenus){
208
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap);
209
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
210
						nextHigherTaxon = subGenus;
211
					}
212
					
213
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
214
					
215
					handleCountries(state, rs, species);
216
					
217
					handleCommonNames(state, rs, species);
218
					
219
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
220
					
221
					objectsToSave.add(species); 
222
					
223

    
224
				} catch (Exception e) {
225
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
226
//					e.printStackTrace();
227
				} 
228
                
229
            }
230
           
231
//            logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
232

    
233
			logger.warn(pluralString + " to save: " + objectsToSave.size());
234
			getTaxonService().save(objectsToSave);	
235
			
236
			return success;
237
		} catch (SQLException e) {
238
			logger.error("SQLException:" +  e);
239
			return false;
240
		}
241
	}
242

    
243
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
244
		String countriesStr = rs.getString("dtSpcCountries");
245
		if (isBlank(countriesStr)){
246
			return;
247
		}
248
		String[] countriesSplit = countriesStr.split(";");
249
		for (String countryStr : countriesSplit){
250
			if (isBlank(countryStr)){
251
				continue;
252
			}
253
			countryStr = countryStr.trim();
254
			
255
			//TODO use isComplete
256
			boolean isComplete = countryStr.endsWith(".");
257
			if (isComplete){
258
				countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
259
			}
260
			boolean isDoubtful = countryStr.endsWith("[?]");
261
			if (isDoubtful){
262
				countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
263
			}
264
			if (countryStr.startsWith("?")){
265
				isDoubtful = true;
266
				countryStr = countryStr.substring(1).trim();
267
			}
268
			
269
			
270
			
271
			countryStr = normalizeCountry(countryStr);
272
			
273
			WaterbodyOrCountry country = getCountry(state, countryStr);
274
			
275
			PresenceTerm status;
276
			if (isDoubtful){
277
				status = PresenceTerm.PRESENT_DOUBTFULLY();
278
			}else{
279
				status = PresenceTerm.PRESENT();
280
			}
281
			
282
			if (country != null){
283
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
284
				Distribution distribution = Distribution.NewInstance(country, status);
285
				desc.addElement(distribution);
286
			}else{
287
				logger.warn("Country string not recognized: " + countryStr);
288
			}
289
		}
290
	}
291

    
292

    
293

    
294
	/**
295
	 * @param countryStr
296
	 * @return
297
	 */
298
	private String normalizeCountry(String countryStr) {
299
		String result = countryStr.trim();
300
		if (result.endsWith(".")){
301
			result = result.substring(0,result.length() - 1);
302
		}
303
		return result; 
304
	}
305
	
306
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
307
		//DON't use, use seperate common name tables instead
308
		
309
//		String commonNamesStr = rs.getString("vernacularnames");
310
//		if (isBlank(commonNamesStr)){
311
//			return;
312
//		}
313
//		String[] commonNamesSplit = commonNamesStr.split(";");
314
//		for (String commonNameStr : commonNamesSplit){
315
//			if (isBlank(commonNameStr)){
316
//				continue;
317
//			}
318
//			Language language = null; //TODO
319
//			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
320
//			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
321
//			desc.addElement(commonName);
322
//		}
323
	}
324

    
325

    
326

    
327

    
328
	/**
329
	 * Compares 2 taxa, returns true of both taxa look similar
330
	 * @param genus
331
	 * @param nextHigherTaxon
332
	 * @return
333
	 */
334
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
335
		ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
336
		ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
337
		if (!name1.getRank().equals(name2.getRank())){
338
			return false;
339
		}
340
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
341
			return false;
342
		}
343
		return true;
344
	}
345

    
346

    
347

    
348

    
349
	private Taxon getParent(Taxon child, Classification classification) {
350
		for (TaxonNode node :  child.getTaxonNodes()){
351
			if (node.getClassification().equals(classification)){
352
				if (node.getParent() != null){
353
					return node.getParent().getTaxon();	
354
				}else{
355
					return null;
356
				}
357
			}
358
		}
359
		return null;
360
	}
361

    
362

    
363

    
364

    
365
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap) {
366
		if (isBlank(uninomial)){
367
			return null;
368
		}
369
		
370
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
371
		
372
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
373
		Taxon taxon = taxonMap.get(key);
374
		if (taxon == null){
375
			ZoologicalName name = ZoologicalName.NewInstance(rank);
376
			name.setGenusOrUninomial(uninomial);
377
			if (isNotBlank(infraGenericEpi)){
378
				name.setInfraGenericEpithet(infraGenericEpi);
379
			}
380
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
381
			
382
			taxonMap.put(key, taxon);
383
			handleAuthorAndYear(author, name);
384
			getTaxonService().save(taxon);
385
		}
386
		
387
		return taxon;
388
	}
389

    
390

    
391
	//fast and dirty is enough here
392
	private Classification classification;
393
	
394
	private Classification getClassification(GlobisImportState state) {
395
		if (this.classification == null){
396
			String name = state.getConfig().getClassificationName();
397
			Reference<?> reference = state.getTransactionalSourceReference();
398
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
399
			classification.setUuid(state.getConfig().getClassificationUuid());
400
			getClassificationService().save(classification);
401
		}
402
		return this.classification;
403
		
404
	}
405

    
406
	/* (non-Javadoc)
407
	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
408
	 */
409
	public Taxon createObject(ResultSet rs, GlobisImportState state)
410
			throws SQLException {
411
		String speciesEpi = rs.getString("dtSpcSpcakt");
412
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
413
		String genusEpi = rs.getString("dtSpcGenusakt");
414
		String author = rs.getString("dtSpcAutor");
415
		
416
		
417
		ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
418
		zooName.setSpecificEpithet(speciesEpi);
419
		if (StringUtils.isNotBlank(subGenusEpi)){
420
			zooName.setInfraGenericEpithet(subGenusEpi);
421
		}
422
		zooName.setGenusOrUninomial(genusEpi);
423
		handleAuthorAndYear(author, zooName);
424
		
425
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
426
		
427
		return taxon;
428
	}
429

    
430

    
431

    
432

    
433

    
434
	/* (non-Javadoc)
435
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
436
	 */
437
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
438
		String nameSpace;
439
		Class cdmClass;
440
		Set<String> idSet;
441
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
442
		try{
443
			Set<String> taxonIdSet = new HashSet<String>();
444
			
445
			while (rs.next()){
446
//				handleForeignKey(rs, taxonIdSet, "taxonId");
447
			}
448
			
449
			//taxon map
450
			nameSpace = TAXON_NAMESPACE;
451
			cdmClass = Taxon.class;
452
			idSet = taxonIdSet;
453
			Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
454
			result.put(nameSpace, objectMap);
455

    
456
			
457
		} catch (SQLException e) {
458
			throw new RuntimeException(e);
459
		}
460
		return result;
461
	}
462
	
463
	/* (non-Javadoc)
464
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
465
	 */
466
	@Override
467
	protected boolean doCheck(GlobisImportState state){
468
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
469
		return validator.validate(state);
470
	}
471
	
472
	
473
	/* (non-Javadoc)
474
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
475
	 */
476
	protected boolean isIgnore(GlobisImportState state){
477
		return ! state.getConfig().isDoCurrentTaxa();
478
	}
479

    
480

    
481

    
482

    
483

    
484
}
(1-1/9)