Project

General

Profile

Download (15.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import com.yourkit.util.Strings;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraCollectionImport;
27
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraSpecimenImport;
28
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelTaxonImport;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
32
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
33
import eu.etaxonomy.cdm.model.common.CdmBase;
34
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
35
import eu.etaxonomy.cdm.model.common.Language;
36
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
37
import eu.etaxonomy.cdm.model.description.Distribution;
38
import eu.etaxonomy.cdm.model.description.PresenceTerm;
39
import eu.etaxonomy.cdm.model.description.TaxonDescription;
40
import eu.etaxonomy.cdm.model.location.NamedArea;
41
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.name.ZoologicalName;
44
import eu.etaxonomy.cdm.model.occurrence.Collection;
45
import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
46
import eu.etaxonomy.cdm.model.reference.Reference;
47
import eu.etaxonomy.cdm.model.taxon.Classification;
48
import eu.etaxonomy.cdm.model.taxon.Taxon;
49
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
51
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
52
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
53
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
54

    
55

    
56
/**
57
 * @author a.mueller
58
 * @created 20.02.2010
59
 * @version 1.0
60
 */
61
@Component
62
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
63
	private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
64
	
65
	private int modCount = 10000;
66
	private static final String pluralString = "current taxa";
67
	private static final String dbTableName = "current_species";
68
	private static final Class cdmTargetClass = Taxon.class;  //not needed
69
	
70
	public GlobisCurrentSpeciesImport(){
71
		super(pluralString, dbTableName, cdmTargetClass);
72
	}
73

    
74

    
75
	
76
	
77
	/* (non-Javadoc)
78
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
79
	 */
80
	@Override
81
	protected String getIdQuery() {
82
		String strRecordQuery = 
83
			" SELECT IDcurrentspec " + 
84
			" FROM " + dbTableName; 
85
		return strRecordQuery;	
86
	}
87

    
88

    
89

    
90

    
91
	/* (non-Javadoc)
92
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
93
	 */
94
	@Override
95
	protected String getRecordQuery(GlobisImportConfigurator config) {
96
		String strRecordQuery = 
97
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
98
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " + 
99
			" FROM " + getTableName() + " cs " +
100
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
101
		return strRecordQuery;
102
	}
103
	
104

    
105

    
106
	/* (non-Javadoc)
107
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
108
	 */
109
	@Override
110
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
111
		boolean success = true;
112
		
113
		Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
114
		
115
		Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
116
//		Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
117
		
118
		ResultSet rs = partitioner.getResultSet();
119

    
120
		Classification classification = getClassification(state);
121
		
122
		try {
123
			
124
			int i = 0;
125

    
126
			//for each reference
127
            while (rs.next()){
128
                
129
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
130
				
131
        		Integer taxonId = rs.getInt("IDcurrentspec");
132
        		
133
        		
134
        		//String dtSpcJahr -> ignore !
135
        		//empty: fiSpcLiteratur
136
        		
137
        		//TODO
138
        		//fiSpcspcgrptax
139
        		
140
        	
141
        		
142
				try {
143
					
144
					//source ref
145
					Reference<?> sourceRef = state.getTransactionalSourceReference();
146
					Taxon nextHigherTaxon = null;
147
					
148
					boolean hasNewParent = false; //true if any parent is new
149
					
150
					//species
151
					Taxon species = createObject(rs, state);
152
					
153
					
154
					String familyStr = rs.getString("dtSpcFamakt");
155
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
156
					String tribeStr = rs.getString("dtSpcTribakt");
157
					
158
					//family
159
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap);
160
					
161
					//subfamily
162
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap);
163
					Taxon subFamilyParent = getParent(subFamily, classification);
164
					if (subFamilyParent != null){
165
						if (! compareTaxa(family, subFamilyParent)){
166
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
167
						}
168
					}else{
169
						classification.addParentChild(family, subFamily, sourceRef, null);
170
					}
171
					nextHigherTaxon = subFamily;
172
					
173
					//tribe
174
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap);
175
					if (tribe != null){
176
						Taxon tribeParent = getParent(tribe, classification);
177
						if (tribeParent != null){
178
							if (! compareTaxa(subFamily, tribeParent)){
179
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
180
							}
181
						}else{
182
							classification.addParentChild(subFamily, tribe, sourceRef, null);
183
						}
184
						nextHigherTaxon = tribe;
185
					}					
186

    
187
					
188
					//genus
189
					String genusStr = rs.getString("dtSpcGenusakt");
190
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
191
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap);
192
					Taxon genusParent = getParent(genus, classification);
193
					
194
					if (genusParent != null){
195
						if (! compareTaxa(genusParent, nextHigherTaxon)){
196
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
197
						}
198
					}else{
199
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
200
					}
201
					nextHigherTaxon = genus;
202
					
203
					//subgenus
204
					String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
205
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
206
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
207
					if (hasSubgenus){
208
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap);
209
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
210
						nextHigherTaxon = subGenus;
211
					}
212
					
213
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
214
					
215
					handleCountries(state, rs, species);
216
					
217
					handleCommonNames(state, rs, species);
218
					
219
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
220
					
221
					objectsToSave.add(species); 
222
					
223

    
224
				} catch (Exception e) {
225
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
226
//					e.printStackTrace();
227
				} 
228
                
229
            }
230
           
231
//            logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
232

    
233
			logger.warn(pluralString + " to save: " + objectsToSave.size());
234
			getTaxonService().save(objectsToSave);	
235
			
236
			return success;
237
		} catch (SQLException e) {
238
			logger.error("SQLException:" +  e);
239
			return false;
240
		}
241
	}
242

    
243
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
244
		String countriesStr = rs.getString("dtSpcCountries");
245
		if (isBlank(countriesStr)){
246
			return;
247
		}
248
		String[] countriesSplit = countriesStr.split(";");
249
		for (String countryStr : countriesSplit){
250
			if (isBlank(countryStr)){
251
				continue;
252
			}
253
			countryStr = countryStr.trim();
254
			
255
			//TODO use isComplete
256
			boolean isComplete = countryStr.endsWith(".");
257
			if (isComplete){
258
				countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
259
			}
260
			boolean isDoubtful = countryStr.endsWith("[?]");
261
			if (isDoubtful){
262
				countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
263
			}
264
			if (countryStr.startsWith("?")){
265
				isDoubtful = true;
266
				countryStr = countryStr.substring(1).trim();
267
			}
268
			
269
			
270
			
271
			countryStr = normalizeCountry(countryStr);
272
			
273
			WaterbodyOrCountry country = getCountry(state, countryStr);
274
			
275
			PresenceTerm status;
276
			if (isDoubtful){
277
				status = PresenceTerm.PRESENT_DOUBTFULLY();
278
			}else{
279
				status = PresenceTerm.PRESENT();
280
			}
281
			
282
			if (country != null){
283
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
284
				Distribution distribution = Distribution.NewInstance(country, status);
285
				desc.addElement(distribution);
286
			}else{
287
				logger.warn("Country string not recognized: " + countryStr);
288
			}
289
		}
290
	}
291

    
292

    
293

    
294
	/**
295
	 * @param countryStr
296
	 * @return
297
	 */
298
	private String normalizeCountry(String countryStr) {
299
		String result = countryStr.trim();
300
		if (result.endsWith(".")){
301
			result = result.substring(0,result.length() - 1);
302
		}
303
		return result; 
304
	}
305
	
306
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
307
		String commonNamesStr = rs.getString("vernacularnames");
308
		if (isBlank(commonNamesStr)){
309
			return;
310
		}
311
		String[] commonNamesSplit = commonNamesStr.split(";");
312
		for (String commonNameStr : commonNamesSplit){
313
			if (isBlank(commonNameStr)){
314
				continue;
315
			}
316
			Language language = null; //TODO
317
			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
318
			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
319
			desc.addElement(commonName);
320
		}
321
	}
322

    
323

    
324

    
325

    
326
	/**
327
	 * Compares 2 taxa, returns true of both taxa look similar
328
	 * @param genus
329
	 * @param nextHigherTaxon
330
	 * @return
331
	 */
332
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
333
		ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
334
		ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
335
		if (!name1.getRank().equals(name2.getRank())){
336
			return false;
337
		}
338
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
339
			return false;
340
		}
341
		return true;
342
	}
343

    
344

    
345

    
346

    
347
	private Taxon getParent(Taxon child, Classification classification) {
348
		for (TaxonNode node :  child.getTaxonNodes()){
349
			if (node.getClassification().equals(classification)){
350
				if (node.getParent() != null){
351
					return node.getParent().getTaxon();	
352
				}else{
353
					return null;
354
				}
355
			}
356
		}
357
		return null;
358
	}
359

    
360

    
361

    
362

    
363
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap) {
364
		if (isBlank(uninomial)){
365
			return null;
366
		}
367
		
368
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
369
		
370
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
371
		Taxon taxon = taxonMap.get(key);
372
		if (taxon == null){
373
			ZoologicalName name = ZoologicalName.NewInstance(rank);
374
			name.setGenusOrUninomial(uninomial);
375
			if (isNotBlank(infraGenericEpi)){
376
				name.setInfraGenericEpithet(infraGenericEpi);
377
			}
378
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
379
			
380
			taxonMap.put(key, taxon);
381
			handleAuthorAndYear(author, name);
382
			getTaxonService().save(taxon);
383
		}
384
		
385
		return taxon;
386
	}
387

    
388

    
389
	//fast and dirty is enough here
390
	private Classification classification;
391
	
392
	private Classification getClassification(GlobisImportState state) {
393
		if (this.classification == null){
394
			String name = state.getConfig().getClassificationName();
395
			Reference<?> reference = state.getTransactionalSourceReference();
396
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
397
			classification.setUuid(state.getConfig().getClassificationUuid());
398
			getClassificationService().save(classification);
399
		}
400
		return this.classification;
401
		
402
	}
403

    
404
	/* (non-Javadoc)
405
	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
406
	 */
407
	public Taxon createObject(ResultSet rs, GlobisImportState state)
408
			throws SQLException {
409
		String speciesEpi = rs.getString("dtSpcSpcakt");
410
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
411
		String genusEpi = rs.getString("dtSpcGenusakt");
412
		String author = rs.getString("dtSpcAutor");
413
		
414
		
415
		ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
416
		zooName.setSpecificEpithet(speciesEpi);
417
		if (StringUtils.isNotBlank(subGenusEpi)){
418
			zooName.setInfraGenericEpithet(subGenusEpi);
419
		}
420
		zooName.setGenusOrUninomial(genusEpi);
421
		handleAuthorAndYear(author, zooName);
422
		
423
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
424
		
425
		return taxon;
426
	}
427

    
428

    
429

    
430

    
431

    
432
	/* (non-Javadoc)
433
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
434
	 */
435
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
436
		String nameSpace;
437
		Class cdmClass;
438
		Set<String> idSet;
439
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
440
		try{
441
			Set<String> taxonIdSet = new HashSet<String>();
442
			
443
			while (rs.next()){
444
//				handleForeignKey(rs, taxonIdSet, "taxonId");
445
			}
446
			
447
			//taxon map
448
			nameSpace = TAXON_NAMESPACE;
449
			cdmClass = Taxon.class;
450
			idSet = taxonIdSet;
451
			Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
452
			result.put(nameSpace, objectMap);
453

    
454
			
455
		} catch (SQLException e) {
456
			throw new RuntimeException(e);
457
		}
458
		return result;
459
	}
460
	
461
	/* (non-Javadoc)
462
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
463
	 */
464
	@Override
465
	protected boolean doCheck(GlobisImportState state){
466
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
467
		return validator.validate(state);
468
	}
469
	
470
	
471
	/* (non-Javadoc)
472
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
473
	 */
474
	protected boolean isIgnore(GlobisImportState state){
475
		return ! state.getConfig().isDoCurrentTaxa();
476
	}
477

    
478

    
479

    
480

    
481

    
482
}
(1-1/9)