Project

General

Profile

Download (13.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.globis;
11

    
12
import java.sql.ResultSet;
13
import java.sql.SQLException;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.UTF8;
25
import eu.etaxonomy.cdm.io.common.IOValidator;
26
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.model.common.Language;
30
import eu.etaxonomy.cdm.model.description.Distribution;
31
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
32
import eu.etaxonomy.cdm.model.description.TaxonDescription;
33
import eu.etaxonomy.cdm.model.location.NamedArea;
34
import eu.etaxonomy.cdm.model.name.IZoologicalName;
35
import eu.etaxonomy.cdm.model.name.Rank;
36
import eu.etaxonomy.cdm.model.name.TaxonName;
37
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
38
import eu.etaxonomy.cdm.model.reference.Reference;
39
import eu.etaxonomy.cdm.model.taxon.Classification;
40
import eu.etaxonomy.cdm.model.taxon.Taxon;
41
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
43

    
44
/**
45
 * @author a.mueller
46
 * @since 20.02.2010
47
 */
48
@Component
49
public class GlobisCurrentSpeciesImport  extends GlobisImportBase<Taxon> {
50

    
51
    private static final long serialVersionUID = -4392659482520384118L;
52
    private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
53

    
54
	private int modCount = 10000;
55
	private static final String pluralString = "current taxa";
56
	private static final String dbTableName = "current_species";
57
	private static final Class<?> cdmTargetClass = Taxon.class;  //not needed
58

    
59
	public GlobisCurrentSpeciesImport(){
60
		super(pluralString, dbTableName, cdmTargetClass);
61
	}
62

    
63
	@Override
64
	protected String getIdQuery() {
65
		String strRecordQuery =
66
			" SELECT IDcurrentspec " +
67
			" FROM " + dbTableName;
68
		return strRecordQuery;
69
	}
70

    
71
	@Override
72
	protected String getRecordQuery(GlobisImportConfigurator config) {
73
		String strRecordQuery =
74
			" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
75
				"  cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
76
			" FROM " + getTableName() + " cs " +
77
			" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
78
		return strRecordQuery;
79
	}
80

    
81
	@Override
82
	public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, GlobisImportState state) {
83
		boolean success = true;
84

    
85
		@SuppressWarnings("rawtypes")
86
        Set<TaxonBase> objectsToSave = new HashSet<>();
87
		@SuppressWarnings("unchecked")
88
        Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
89
		ResultSet rs = partitioner.getResultSet();
90

    
91
		Classification classification = getClassification(state);
92

    
93
		try {
94

    
95
			int i = 0;
96

    
97
			//for each reference
98
            while (rs.next()){
99

    
100
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
101

    
102
        		Integer taxonId = rs.getInt("IDcurrentspec");
103

    
104
        		//String dtSpcJahr -> ignore !
105
        		//empty: fiSpcLiteratur
106

    
107
        		//TODO
108
        		//fiSpcspcgrptax
109

    
110
				try {
111

    
112
					//source ref
113
					Reference sourceRef = state.getTransactionalSourceReference();
114
					Taxon nextHigherTaxon = null;
115

    
116
					boolean hasNewParent = false; //true if any parent is new
117

    
118
					//species
119
					Taxon species = createObject(rs, state, taxonId);
120

    
121

    
122
					String familyStr = rs.getString("dtSpcFamakt");
123
					String subFamilyStr = rs.getString("dtSpcSubfamakt");
124
					String tribeStr = rs.getString("dtSpcTribakt");
125

    
126
					//family
127
					Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap, taxonId);
128

    
129
					//subfamily
130
					Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap, taxonId);
131
					Taxon subFamilyParent = getParent(subFamily, classification);
132
					if (subFamilyParent != null){
133
						if (! compareTaxa(family, subFamilyParent)){
134
							logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
135
						}
136
					}else{
137
						classification.addParentChild(family, subFamily, sourceRef, null);
138
					}
139
					nextHigherTaxon = subFamily;
140

    
141
					//tribe
142
					Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap, taxonId);
143
					if (tribe != null){
144
						Taxon tribeParent = getParent(tribe, classification);
145
						if (tribeParent != null){
146
							if (! compareTaxa(subFamily, tribeParent)){
147
								logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
148
							}
149
						}else{
150
							classification.addParentChild(subFamily, tribe, sourceRef, null);
151
						}
152
						nextHigherTaxon = tribe;
153
					}
154

    
155

    
156
					//genus
157
					String genusStr = rs.getString("dtSpcGenusakt");
158
					String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
159
					Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap, taxonId);
160
					Taxon genusParent = getParent(genus, classification);
161

    
162
					if (genusParent != null){
163
						if (! compareTaxa(genusParent, nextHigherTaxon)){
164
							logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
165
						}
166
					}else{
167
						classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
168
					}
169
					nextHigherTaxon = genus;
170

    
171
					//subgenus
172
					String subGenusStr = CdmBase.deproxy(species.getName(), TaxonName.class).getInfraGenericEpithet();
173
					String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
174
					boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
175
					if (hasSubgenus){
176
						Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap, taxonId);
177
						classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
178
						nextHigherTaxon = subGenus;
179
					}
180

    
181
					classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
182

    
183
					handleCountries(state, rs, species, taxonId);
184

    
185
					//common names -> not used anymore
186
					handleCommonNames(state, rs, species);
187

    
188
					this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
189

    
190
					objectsToSave.add(species);
191

    
192

    
193
				} catch (Exception e) {
194
					logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
195
					e.printStackTrace();
196
				}
197

    
198
            }
199

    
200
			logger.warn(pluralString + " to save: " + objectsToSave.size());
201
			getTaxonService().save(objectsToSave);
202

    
203
			return success;
204
		} catch (SQLException e) {
205
			logger.error("SQLException:" +  e);
206
			return false;
207
		}
208
	}
209

    
210
	private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species, Integer taxonId) throws SQLException {
211
		String countriesStr = rs.getString("dtSpcCountries");
212
		if (isBlank(countriesStr)){
213
			return;
214
		}
215
		String[] countriesSplit = countriesStr.split(";");
216
		for (String countryStr : countriesSplit){
217
			if (isBlank(countryStr)){
218
				continue;
219
			}
220
			countryStr = countryStr.trim();
221

    
222
			//TODO use isComplete
223
			boolean isComplete = countryStr.endsWith(".");
224
			if (isComplete){
225
				countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
226
			}
227
			boolean isDoubtful = countryStr.endsWith("[?]");
228
			if (isDoubtful){
229
				countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
230
			}
231
			if (countryStr.startsWith("?")){
232
				isDoubtful = true;
233
				countryStr = countryStr.substring(1).trim();
234
			}
235

    
236
			countryStr = normalizeCountry(countryStr);
237

    
238
			NamedArea country = getCountry(state, countryStr);
239

    
240
			PresenceAbsenceTerm status;
241
			if (isDoubtful){
242
				status = PresenceAbsenceTerm.PRESENT_DOUBTFULLY();
243
			}else{
244
				status = PresenceAbsenceTerm.PRESENT();
245
			}
246

    
247
			if (country != null){
248
				TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
249
				Distribution distribution = Distribution.NewInstance(country, status);
250
				desc.addElement(distribution);
251
			}else{
252
				if (countryStr.length() > 0){
253
					logger.warn("Country string not recognized : " + countryStr + " for IDcurrentspec " + taxonId);
254
				}
255
			}
256
		}
257
	}
258

    
259
	private String normalizeCountry(String countryStr) {
260
		String result = countryStr.trim();
261
		if (result.endsWith(".")){
262
			result = result.substring(0,result.length() - 1);
263
		}
264
		while (result.startsWith(UTF8.NO_BREAK_SPACE.toString())){
265
			result = result.substring(1);  //
266
		}
267
		if (result.matches("\\s+")){
268
			result = "";
269
		}
270
		return result.trim();
271
	}
272

    
273
	private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
274
		//DON't use, use seperate common name tables instead
275

    
276
//		String commonNamesStr = rs.getString("vernacularnames");
277
//		if (isBlank(commonNamesStr)){
278
//			return;
279
//		}
280
//		String[] commonNamesSplit = commonNamesStr.split(";");
281
//		for (String commonNameStr : commonNamesSplit){
282
//			if (isBlank(commonNameStr)){
283
//				continue;
284
//			}
285
//			Language language = null; //TODO
286
//			CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
287
//			TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
288
//			desc.addElement(commonName);
289
//		}
290
	}
291

    
292
	/**
293
	 * Compares 2 taxa, returns true of both taxa look similar
294
	 * @param genus
295
	 * @param nextHigherTaxon
296
	 * @return
297
	 */
298
	private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
299
		IZoologicalName name1 = taxon1.getName();
300
		IZoologicalName name2 = taxon2.getName();
301
		if (!name1.getRank().equals(name2.getRank())){
302
			return false;
303
		}
304
		if (! name1.getTitleCache().equals(name2.getTitleCache())){
305
			return false;
306
		}
307
		return true;
308
	}
309

    
310
	private Taxon getParent(Taxon child, Classification classification) {
311
		if (child == null){
312
			logger.warn("Child is null");
313
			return null;
314
		}
315
		for (TaxonNode node :  child.getTaxonNodes()){
316
			if (node.getClassification().equals(classification)){
317
				if (node.getParent() != null){
318
					return node.getParent().getTaxon();
319
				}else{
320
					return null;
321
				}
322
			}
323
		}
324
		return null;
325
	}
326

    
327
	private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap, Integer taxonId) {
328
		if (isBlank(uninomial)){
329
			return null;
330
		}
331

    
332
		String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
333

    
334
		String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
335
		Taxon taxon = taxonMap.get(key);
336
		if (taxon == null){
337
			IZoologicalName name = TaxonNameFactory.NewZoologicalInstance(rank);
338
			name.setGenusOrUninomial(uninomial);
339
			if (isNotBlank(infraGenericEpi)){
340
				name.setInfraGenericEpithet(infraGenericEpi);
341
			}
342
			taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
343

    
344
			taxonMap.put(key, taxon);
345
			handleAuthorAndYear(author, name, taxonId, state);
346
			getTaxonService().save(taxon);
347
		}
348

    
349
		return taxon;
350
	}
351

    
352

    
353
	//fast and dirty is enough here
354
	private Classification classification;
355

    
356
	private Classification getClassification(GlobisImportState state) {
357
		if (this.classification == null){
358
			String name = state.getConfig().getClassificationName();
359
			Reference reference = state.getTransactionalSourceReference();
360
			this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
361
			classification.setUuid(state.getConfig().getClassificationUuid());
362
			getClassificationService().save(classification);
363
		}
364
		return this.classification;
365

    
366
	}
367

    
368
	public Taxon createObject(ResultSet rs, GlobisImportState state, Integer taxonId)
369
			throws SQLException {
370
		String speciesEpi = rs.getString("dtSpcSpcakt");
371
		String subGenusEpi = rs.getString("dtSpcSubgenakt");
372
		String genusEpi = rs.getString("dtSpcGenusakt");
373
		String author = rs.getString("dtSpcAutor");
374

    
375

    
376
		IZoologicalName zooName = TaxonNameFactory.NewZoologicalInstance(Rank.SPECIES());
377
		zooName.setSpecificEpithet(speciesEpi);
378
		if (StringUtils.isNotBlank(subGenusEpi)){
379
			zooName.setInfraGenericEpithet(subGenusEpi);
380
		}
381
		zooName.setGenusOrUninomial(genusEpi);
382
		handleAuthorAndYear(author, zooName, taxonId, state);
383

    
384
		Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
385

    
386
		return taxon;
387
	}
388

    
389
	@Override
390
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
391

    
392
	    String nameSpace;
393
		Set<String> idSet;
394
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
395
		try{
396
			Set<String> taxonIdSet = new HashSet<>();
397

    
398
			while (rs.next()){
399
//				handleForeignKey(rs, taxonIdSet, "taxonId");
400
			}
401

    
402
			//taxon map
403
			nameSpace = TAXON_NAMESPACE;
404
			idSet = taxonIdSet;
405
			Map<String, Taxon> objectMap = getCommonService().getSourcedObjectsByIdInSourceC(Taxon.class, idSet, nameSpace);
406
			result.put(nameSpace, objectMap);
407

    
408

    
409
		} catch (SQLException e) {
410
			throw new RuntimeException(e);
411
		}
412
		return result;
413
	}
414

    
415
	@Override
416
	protected boolean doCheck(GlobisImportState state){
417
		IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
418
		return validator.validate(state);
419
	}
420

    
421
	@Override
422
    protected boolean isIgnore(GlobisImportState state){
423
		return ! state.getConfig().isDoCurrentTaxa();
424
	}
425

    
426

    
427

    
428

    
429

    
430
}
(3-3/10)