Project

General

Profile

Download (15.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import com.yourkit.util.Strings;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraCollectionImport;
27
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraSpecimenImport;
28
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelTaxonImport;
29
import eu.etaxonomy.cdm.io.common.IOValidator;
30
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
32
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
33
import eu.etaxonomy.cdm.model.common.CdmBase;
34
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
35
import eu.etaxonomy.cdm.model.common.Language;
36
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
37
import eu.etaxonomy.cdm.model.description.Distribution;
38
import eu.etaxonomy.cdm.model.description.PresenceTerm;
39
import eu.etaxonomy.cdm.model.description.TaxonDescription;
40
import eu.etaxonomy.cdm.model.location.NamedArea;
41
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.name.ZoologicalName;
44
import eu.etaxonomy.cdm.model.occurrence.Collection;
45
import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
46
import eu.etaxonomy.cdm.model.reference.Reference;
47
import eu.etaxonomy.cdm.model.taxon.Classification;
48
import eu.etaxonomy.cdm.model.taxon.Taxon;
49
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
51
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
52
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
53
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
54

    
55

    
56
/**
57
 * @author a.mueller
58
 * @created 20.02.2010
59
 * @version 1.0
60
 */
61
@Component
62
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
63
	private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
64
	
65
	private int modCount = 10000;
66
	private static final String pluralString = "current taxa";
67
	private static final String dbTableName = "current_species";
68
	private static final Class cdmTargetClass = Taxon.class;  //not needed
69
	
70
	public GlobisCurrentSpeciesImport(){
71
		super(pluralString, dbTableName, cdmTargetClass);
72
	}
73

    
74

    
75
	
76
	
77
	/* (non-Javadoc)
78
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
79
	 */
80
	@Override
81
	protected String getIdQuery() {
82
		String strRecordQuery = 
83
			" SELECT IDcurrentspec " + 
84
			" FROM " + dbTableName; 
85
		return strRecordQuery;	
86
	}
87

    
88

    
89

    
90

    
91
	/* (non-Javadoc)
92
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
93
	 */
94
	@Override
95
	protected String getRecordQuery(GlobisImportConfigurator config) {
96
		String strRecordQuery = 
97
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
98
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " + 
99
			" FROM " + getTableName() + " cs " +
100
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
101
		return strRecordQuery;
102
	}
103
	
104

    
105

    
106
	/* (non-Javadoc)
107
	 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
108
	 */
109
	@Override
110
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
111
		boolean success = true;
112
		
113
		Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
114
		
115
		Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
116
//		Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
117
		
118
		ResultSet rs = partitioner.getResultSet();
119

    
120
		Classification classification = getClassification(state);
121
		
122
		try {
123
			
124
			int i = 0;
125

    
126
			//for each reference
127
            while (rs.next()){
128
                
129
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
130
				
131
        		Integer taxonId = rs.getInt("IDcurrentspec");
132
        		
133
        		
134
        		//String dtSpcJahr -> ignore !
135
        		//empty: fiSpcLiteratur
136
        		
137
        		//TODO
138
        		//fiSpcspcgrptax
139
        		
140
        	
141
        		
142
				try {
143
					
144
					//source ref
145
					Reference<?> sourceRef = state.getTransactionalSourceReference();
146
					Taxon nextHigherTaxon = null;
147
					
148
					boolean hasNewParent = false; //true if any parent is new
149
					
150
					//species
151
					Taxon species = createObject(rs, state);
152
					
153
					
154
					String familyStr = rs.getString("dtSpcFamakt");
155
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
156
					String tribeStr = rs.getString("dtSpcTribakt");
157
					
158
					//family
159
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap);
160
					
161
					//subfamily
162
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap);
163
					Taxon subFamilyParent = getParent(subFamily, classification);
164
					if (subFamilyParent != null){
165
						if (! compareTaxa(family, subFamilyParent)){
166
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
167
						}
168
					}else{
169
						classification.addParentChild(family, subFamily, sourceRef, null);
170
					}
171
					nextHigherTaxon = subFamily;
172
					
173
					//tribe
174
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap);
175
					if (tribe != null){
176
						Taxon tribeParent = getParent(tribe, classification);
177
						if (tribeParent != null){
178
							if (! compareTaxa(subFamily, tribeParent)){
179
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
180
							}
181
						}else{
182
							classification.addParentChild(subFamily, tribe, sourceRef, null);
183
						}
184
						nextHigherTaxon = tribe;
185
					}					
186

    
187
					
188
					//genus
189
					String genusStr = rs.getString("dtSpcGenusakt");
190
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
191
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap);
192
					Taxon genusParent = getParent(genus, classification);
193
					
194
					if (genusParent != null){
195
						if (! compareTaxa(genusParent, nextHigherTaxon)){
196
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
197
						}
198
					}else{
199
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
200
					}
201
					nextHigherTaxon = genus;
202
					
203
					//subgenus
204
					String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
205
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
206
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
207
					if (hasSubgenus){
208
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap);
209
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
210
						nextHigherTaxon = subGenus;
211
					}
212
					
213
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
214
					
215
					handleCountries(state, rs, species);
216
					
217
					handleCommonNames(state, rs, species);
218
					
219
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, REFERENCE_NAMESPACE);
220
					
221
					objectsToSave.add(species); 
222
					
223

    
224
				} catch (Exception e) {
225
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
226
//					e.printStackTrace();
227
				} 
228
                
229
            }
230
           
231
//            logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
232

    
233
			logger.warn(pluralString + " to save: " + objectsToSave.size());
234
			getTaxonService().save(objectsToSave);	
235
			
236
			return success;
237
		} catch (SQLException e) {
238
			logger.error("SQLException:" +  e);
239
			return false;
240
		}
241
	}
242

    
243
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
244
		String countriesStr = rs.getString("dtSpcCountries");
245
		if (isBlank(countriesStr)){
246
			return;
247
		}
248
		String[] countriesSplit = countriesStr.split(";");
249
		for (String countryStr : countriesSplit){
250
			if (isBlank(countryStr)){
251
				continue;
252
			}else{
253
				countryStr = normalizeCountry(countryStr);
254
			}
255
			
256
			WaterbodyOrCountry country = WaterbodyOrCountry.getWaterbodyOrCountryByLabel(countryStr);
257
			if (country == null){
258
				try {
259
					country = (WaterbodyOrCountry)state.getTransformer().getNamedAreaByKey(countryStr);
260
				} catch (UndefinedTransformerMethodException e) {
261
					e.printStackTrace();
262
				}
263
			}
264
			
265
			if (country != null){
266
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
267
				Distribution distribution = Distribution.NewInstance(country, PresenceTerm.PRESENT());
268
				desc.addElement(distribution);
269
			}else{
270
				logger.warn("Country string not recognized: " + countryStr);
271
			}
272
		}
273
	}
274

    
275

    
276

    
277

    
278
	/**
279
	 * @param countryStr
280
	 * @return
281
	 */
282
	private String normalizeCountry(String countryStr) {
283
		String result = countryStr.trim();
284
		if (result.endsWith(".")){
285
			result = result.substring(0,result.length() - 1);
286
		}
287
		return result; 
288
	}
289
	
290
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
291
		String commonNamesStr = rs.getString("vernacularnames");
292
		if (isBlank(commonNamesStr)){
293
			return;
294
		}
295
		String[] commonNamesSplit = commonNamesStr.split(";");
296
		for (String commonNameStr : commonNamesSplit){
297
			if (isBlank(commonNameStr)){
298
				continue;
299
			}
300
			Language language = null; //TODO
301
			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
302
			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
303
			desc.addElement(commonName);
304
		}
305
	}
306

    
307

    
308

    
309

    
310
	/**
311
	 * Compares 2 taxa, returns true of both taxa look similar
312
	 * @param genus
313
	 * @param nextHigherTaxon
314
	 * @return
315
	 */
316
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
317
		ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
318
		ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
319
		if (!name1.getRank().equals(name2.getRank())){
320
			return false;
321
		}
322
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
323
			return false;
324
		}
325
		return true;
326
	}
327

    
328

    
329

    
330

    
331
	private Taxon getParent(Taxon child, Classification classification) {
332
		for (TaxonNode node :  child.getTaxonNodes()){
333
			if (node.getClassification().equals(classification)){
334
				if (node.getParent() != null){
335
					return node.getParent().getTaxon();	
336
				}else{
337
					return null;
338
				}
339
			}
340
		}
341
		return null;
342
	}
343

    
344

    
345

    
346

    
347
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap) {
348
		if (isBlank(uninomial)){
349
			return null;
350
		}
351
		
352
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
353
		
354
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
355
		Taxon taxon = taxonMap.get(key);
356
		if (taxon == null){
357
			ZoologicalName name = ZoologicalName.NewInstance(rank);
358
			name.setGenusOrUninomial(uninomial);
359
			if (isNotBlank(infraGenericEpi)){
360
				name.setInfraGenericEpithet(infraGenericEpi);
361
			}
362
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
363
			
364
			taxonMap.put(key, taxon);
365
			handleAuthor(author, name);
366
			getTaxonService().save(taxon);
367
		}
368
		
369
		return taxon;
370
	}
371

    
372

    
373
	//fast and dirty is enough here
374
	private Classification classification;
375
	
376
	private Classification getClassification(GlobisImportState state) {
377
		if (this.classification == null){
378
			String name = state.getConfig().getClassificationName();
379
			Reference<?> reference = state.getTransactionalSourceReference();
380
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
381
			classification.setUuid(state.getConfig().getClassificationUuid());
382
			getClassificationService().save(classification);
383
		}
384
		return this.classification;
385
		
386
	}
387

    
388
	private INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
389
	
390

    
391
	/* (non-Javadoc)
392
	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
393
	 */
394
	public Taxon createObject(ResultSet rs, GlobisImportState state)
395
			throws SQLException {
396
		String speciesEpi = rs.getString("dtSpcSpcakt");
397
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
398
		String genusEpi = rs.getString("dtSpcGenusakt");
399
		String author = rs.getString("dtSpcAutor");
400
		
401
		
402
		ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
403
		zooName.setSpecificEpithet(speciesEpi);
404
		if (StringUtils.isNotBlank(subGenusEpi)){
405
			zooName.setInfraGenericEpithet(subGenusEpi);
406
		}
407
		zooName.setGenusOrUninomial(genusEpi);
408
		handleAuthor(author, zooName);
409
		
410
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
411
		
412
		return taxon;
413
	}
414

    
415

    
416

    
417

    
418
	/**
419
	 * @param author
420
	 * @param zooName
421
	 */
422
	private void handleAuthor(String author, ZoologicalName zooName) {
423
		if (isBlank(author)){
424
			return;
425
		}
426
		try {
427
			if(author.matches(".*\\,\\s\\[\\d{4}\\].*")){
428
				author = author.replace("[", "").replace("]", "");
429
			}
430
			if (author.contains("?")){
431
				author = author.replace("H?bner", "H\u00fcbner");
432
				author = author.replace("Oberth?r", "Oberth\u00fcr");
433
			}
434
			
435
			parser.parseAuthors(zooName, author);
436
		} catch (StringNotParsableException e) {
437
			logger.warn("Author could not be parsed: " + author);
438
			zooName.setAuthorshipCache(author, true);
439
		}
440
	}
441

    
442
	/* (non-Javadoc)
443
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
444
	 */
445
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
446
		String nameSpace;
447
		Class cdmClass;
448
		Set<String> idSet;
449
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
450
		try{
451
			Set<String> taxonIdSet = new HashSet<String>();
452
			
453
			while (rs.next()){
454
//				handleForeignKey(rs, taxonIdSet, "taxonId");
455
			}
456
			
457
			//taxon map
458
			nameSpace = TAXON_NAMESPACE;
459
			cdmClass = Taxon.class;
460
			idSet = taxonIdSet;
461
			Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
462
			result.put(nameSpace, objectMap);
463

    
464
			
465
		} catch (SQLException e) {
466
			throw new RuntimeException(e);
467
		}
468
		return result;	}
469
	
470
	/* (non-Javadoc)
471
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
472
	 */
473
	@Override
474
	protected boolean doCheck(GlobisImportState state){
475
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
476
		return validator.validate(state);
477
	}
478
	
479
	
480
	/* (non-Javadoc)
481
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
482
	 */
483
	protected boolean isIgnore(GlobisImportState state){
484
		return ! state.getConfig().isDoCurrentTaxa();
485
	}
486

    
487

    
488

    
489

    
490

    
491
}
(1-1/9)