Project

General

Profile

Download (14.3 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.UTF8;
25
import eu.etaxonomy.cdm.io.common.IOValidator;
26
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.model.common.Language;
30
import eu.etaxonomy.cdm.model.description.Distribution;
31
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
32
import eu.etaxonomy.cdm.model.description.TaxonDescription;
33
import eu.etaxonomy.cdm.model.location.NamedArea;
34
import eu.etaxonomy.cdm.model.name.IZoologicalName;
35
import eu.etaxonomy.cdm.model.name.Rank;
36
import eu.etaxonomy.cdm.model.name.TaxonName;
37
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
38
import eu.etaxonomy.cdm.model.reference.Reference;
39
import eu.etaxonomy.cdm.model.taxon.Classification;
40
import eu.etaxonomy.cdm.model.taxon.Taxon;
41
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
43

    
44

    
45
/**
46
 * @author a.mueller
47
 * @since 20.02.2010
48
 */
49
@Component
50
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
51
	private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
52

    
53
	private int modCount = 10000;
54
	private static final String pluralString = "current taxa";
55
	private static final String dbTableName = "current_species";
56
	private static final Class<?> cdmTargetClass = Taxon.class;  //not needed
57

    
58
	public GlobisCurrentSpeciesImport(){
59
		super(pluralString, dbTableName, cdmTargetClass);
60
	}
61

    
62
	@Override
63
	protected String getIdQuery() {
64
		String strRecordQuery =
65
			" SELECT IDcurrentspec " +
66
			" FROM " + dbTableName;
67
		return strRecordQuery;
68
	}
69

    
70
	@Override
71
	protected String getRecordQuery(GlobisImportConfigurator config) {
72
		String strRecordQuery =
73
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
74
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
75
			" FROM " + getTableName() + " cs " +
76
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
77
		return strRecordQuery;
78
	}
79

    
80
	@Override
81
	public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
82
		boolean success = true;
83

    
84
		Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
85
		Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
86
		ResultSet rs = partitioner.getResultSet();
87

    
88
		Classification classification = getClassification(state);
89

    
90
		try {
91

    
92
			int i = 0;
93

    
94
			//for each reference
95
            while (rs.next()){
96

    
97
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
98

    
99
        		Integer taxonId = rs.getInt("IDcurrentspec");
100

    
101
        		//String dtSpcJahr -> ignore !
102
        		//empty: fiSpcLiteratur
103

    
104
        		//TODO
105
        		//fiSpcspcgrptax
106

    
107
				try {
108

    
109
					//source ref
110
					Reference sourceRef = state.getTransactionalSourceReference();
111
					Taxon nextHigherTaxon = null;
112

    
113
					boolean hasNewParent = false; //true if any parent is new
114

    
115
					//species
116
					Taxon species = createObject(rs, state, taxonId);
117

    
118

    
119
					String familyStr = rs.getString("dtSpcFamakt");
120
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
121
					String tribeStr = rs.getString("dtSpcTribakt");
122

    
123
					//family
124
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap, taxonId);
125

    
126
					//subfamily
127
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap, taxonId);
128
					Taxon subFamilyParent = getParent(subFamily, classification);
129
					if (subFamilyParent != null){
130
						if (! compareTaxa(family, subFamilyParent)){
131
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
132
						}
133
					}else{
134
						classification.addParentChild(family, subFamily, sourceRef, null);
135
					}
136
					nextHigherTaxon = subFamily;
137

    
138
					//tribe
139
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap, taxonId);
140
					if (tribe != null){
141
						Taxon tribeParent = getParent(tribe, classification);
142
						if (tribeParent != null){
143
							if (! compareTaxa(subFamily, tribeParent)){
144
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
145
							}
146
						}else{
147
							classification.addParentChild(subFamily, tribe, sourceRef, null);
148
						}
149
						nextHigherTaxon = tribe;
150
					}
151

    
152

    
153
					//genus
154
					String genusStr = rs.getString("dtSpcGenusakt");
155
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
156
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap, taxonId);
157
					Taxon genusParent = getParent(genus, classification);
158

    
159
					if (genusParent != null){
160
						if (! compareTaxa(genusParent, nextHigherTaxon)){
161
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
162
						}
163
					}else{
164
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
165
					}
166
					nextHigherTaxon = genus;
167

    
168
					//subgenus
169
					String subGenusStr = CdmBase.deproxy(species.getName(), TaxonName.class).getInfraGenericEpithet();
170
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
171
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
172
					if (hasSubgenus){
173
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap, taxonId);
174
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
175
						nextHigherTaxon = subGenus;
176
					}
177

    
178
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
179

    
180
					handleCountries(state, rs, species, taxonId);
181

    
182
					//common names -> not used anymore
183
					handleCommonNames(state, rs, species);
184

    
185
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
186

    
187
					objectsToSave.add(species);
188

    
189

    
190
				} catch (Exception e) {
191
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
192
					e.printStackTrace();
193
				}
194

    
195
            }
196

    
197
			logger.warn(pluralString + " to save: " + objectsToSave.size());
198
			getTaxonService().save(objectsToSave);
199

    
200
			return success;
201
		} catch (SQLException e) {
202
			logger.error("SQLException:" +  e);
203
			return false;
204
		}
205
	}
206

    
207
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species, Integer taxonId) throws SQLException {
208
		String countriesStr = rs.getString("dtSpcCountries");
209
		if (isBlank(countriesStr)){
210
			return;
211
		}
212
		String[] countriesSplit = countriesStr.split(";");
213
		for (String countryStr : countriesSplit){
214
			if (isBlank(countryStr)){
215
				continue;
216
			}
217
			countryStr = countryStr.trim();
218

    
219
			//TODO use isComplete
220
			boolean isComplete = countryStr.endsWith(".");
221
			if (isComplete){
222
				countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
223
			}
224
			boolean isDoubtful = countryStr.endsWith("[?]");
225
			if (isDoubtful){
226
				countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
227
			}
228
			if (countryStr.startsWith("?")){
229
				isDoubtful = true;
230
				countryStr = countryStr.substring(1).trim();
231
			}
232

    
233

    
234

    
235
			countryStr = normalizeCountry(countryStr);
236

    
237
			NamedArea country = getCountry(state, countryStr);
238

    
239
			PresenceAbsenceTerm status;
240
			if (isDoubtful){
241
				status = PresenceAbsenceTerm.PRESENT_DOUBTFULLY();
242
			}else{
243
				status = PresenceAbsenceTerm.PRESENT();
244
			}
245

    
246
			if (country != null){
247
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
248
				Distribution distribution = Distribution.NewInstance(country, status);
249
				desc.addElement(distribution);
250
			}else{
251
				if (countryStr.length() > 0){
252
					logger.warn("Country string not recognized : " + countryStr + " for IDcurrentspec " + taxonId);
253
				}
254
			}
255
		}
256
	}
257

    
258

    
259

    
260
	/**
261
	 * @param countryStr
262
	 * @return
263
	 */
264
	private String normalizeCountry(String countryStr) {
265
		String result = countryStr.trim();
266
		if (result.endsWith(".")){
267
			result = result.substring(0,result.length() - 1);
268
		}
269
		while (result.startsWith(UTF8.NO_BREAK_SPACE.toString())){
270
			result = result.substring(1);  //
271
		}
272
		if (result.matches("\\s+")){
273
			result = "";
274
		}
275
		return result.trim();
276
	}
277

    
278
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
279
		//DON't use, use seperate common name tables instead
280

    
281
//		String commonNamesStr = rs.getString("vernacularnames");
282
//		if (isBlank(commonNamesStr)){
283
//			return;
284
//		}
285
//		String[] commonNamesSplit = commonNamesStr.split(";");
286
//		for (String commonNameStr : commonNamesSplit){
287
//			if (isBlank(commonNameStr)){
288
//				continue;
289
//			}
290
//			Language language = null; //TODO
291
//			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
292
//			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
293
//			desc.addElement(commonName);
294
//		}
295
	}
296

    
297

    
298

    
299

    
300
	/**
301
	 * Compares 2 taxa, returns true of both taxa look similar
302
	 * @param genus
303
	 * @param nextHigherTaxon
304
	 * @return
305
	 */
306
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
307
		IZoologicalName name1 = taxon1.getName();
308
		IZoologicalName name2 = taxon2.getName();
309
		if (!name1.getRank().equals(name2.getRank())){
310
			return false;
311
		}
312
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
313
			return false;
314
		}
315
		return true;
316
	}
317

    
318

    
319

    
320

    
321
	private Taxon getParent(Taxon child, Classification classification) {
322
		if (child == null){
323
			logger.warn("Child is null");
324
			return null;
325
		}
326
		for (TaxonNode node :  child.getTaxonNodes()){
327
			if (node.getClassification().equals(classification)){
328
				if (node.getParent() != null){
329
					return node.getParent().getTaxon();
330
				}else{
331
					return null;
332
				}
333
			}
334
		}
335
		return null;
336
	}
337

    
338

    
339

    
340

    
341
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap, Integer taxonId) {
342
		if (isBlank(uninomial)){
343
			return null;
344
		}
345

    
346
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
347

    
348
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
349
		Taxon taxon = taxonMap.get(key);
350
		if (taxon == null){
351
			IZoologicalName name = TaxonNameFactory.NewZoologicalInstance(rank);
352
			name.setGenusOrUninomial(uninomial);
353
			if (isNotBlank(infraGenericEpi)){
354
				name.setInfraGenericEpithet(infraGenericEpi);
355
			}
356
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
357

    
358
			taxonMap.put(key, taxon);
359
			handleAuthorAndYear(author, name, taxonId, state);
360
			getTaxonService().save(taxon);
361
		}
362

    
363
		return taxon;
364
	}
365

    
366

    
367
	//fast and dirty is enough here
368
	private Classification classification;
369

    
370
	private Classification getClassification(GlobisImportState state) {
371
		if (this.classification == null){
372
			String name = state.getConfig().getClassificationName();
373
			Reference reference = state.getTransactionalSourceReference();
374
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
375
			classification.setUuid(state.getConfig().getClassificationUuid());
376
			getClassificationService().save(classification);
377
		}
378
		return this.classification;
379

    
380
	}
381

    
382
	/* (non-Javadoc)
383
	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
384
	 */
385
	public Taxon createObject(ResultSet rs, GlobisImportState state, Integer taxonId)
386
			throws SQLException {
387
		String speciesEpi = rs.getString("dtSpcSpcakt");
388
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
389
		String genusEpi = rs.getString("dtSpcGenusakt");
390
		String author = rs.getString("dtSpcAutor");
391

    
392

    
393
		IZoologicalName zooName = TaxonNameFactory.NewZoologicalInstance(Rank.SPECIES());
394
		zooName.setSpecificEpithet(speciesEpi);
395
		if (StringUtils.isNotBlank(subGenusEpi)){
396
			zooName.setInfraGenericEpithet(subGenusEpi);
397
		}
398
		zooName.setGenusOrUninomial(genusEpi);
399
		handleAuthorAndYear(author, zooName, taxonId, state);
400

    
401
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
402

    
403
		return taxon;
404
	}
405

    
406

    
407

    
408

    
409
	@Override
410
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
411
		String nameSpace;
412
		Class cdmClass;
413
		Set<String> idSet;
414
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
415
		try{
416
			Set<String> taxonIdSet = new HashSet<String>();
417

    
418
			while (rs.next()){
419
//				handleForeignKey(rs, taxonIdSet, "taxonId");
420
			}
421

    
422
			//taxon map
423
			nameSpace = TAXON_NAMESPACE;
424
			cdmClass = Taxon.class;
425
			idSet = taxonIdSet;
426
			Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
427
			result.put(nameSpace, objectMap);
428

    
429

    
430
		} catch (SQLException e) {
431
			throw new RuntimeException(e);
432
		}
433
		return result;
434
	}
435

    
436
	/* (non-Javadoc)
437
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
438
	 */
439
	@Override
440
	protected boolean doCheck(GlobisImportState state){
441
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
442
		return validator.validate(state);
443
	}
444

    
445

    
446
	/* (non-Javadoc)
447
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
448
	 */
449
	@Override
450
    protected boolean isIgnore(GlobisImportState state){
451
		return ! state.getConfig().isDoCurrentTaxa();
452
	}
453

    
454

    
455

    
456

    
457

    
458
}
(3-3/10)