Project

General

Profile

Download (14 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.algaterra;
11

    
12
import java.net.URI;
13
import java.sql.ResultSet;
14
import java.sql.SQLException;
15
import java.text.ParseException;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.Locale;
19
import java.util.Map;
20
import java.util.Set;
21

    
22
import org.apache.commons.lang.StringUtils;
23
import org.apache.log4j.Logger;
24
import org.joda.time.DateTime;
25
import org.joda.time.format.DateTimeFormat;
26
import org.joda.time.format.DateTimeFormatter;
27
import org.springframework.format.datetime.joda.DateTimeParser;
28
import org.springframework.stereotype.Component;
29

    
30
import eu.etaxonomy.cdm.io.algaterra.validation.AlgaTerraDnaImportValidator;
31
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator;
32
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState;
33
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelTaxonImport;
34
import eu.etaxonomy.cdm.io.common.IOValidator;
35
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
36
import eu.etaxonomy.cdm.model.common.Annotation;
37
import eu.etaxonomy.cdm.model.common.AnnotationType;
38
import eu.etaxonomy.cdm.model.common.CdmBase;
39
import eu.etaxonomy.cdm.model.common.Language;
40
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
41
import eu.etaxonomy.cdm.model.description.TaxonDescription;
42
import eu.etaxonomy.cdm.model.molecular.DnaSample;
43
import eu.etaxonomy.cdm.model.molecular.Sequence;
44
import eu.etaxonomy.cdm.model.occurrence.DerivationEvent;
45
import eu.etaxonomy.cdm.model.occurrence.DerivationEventType;
46
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
47
import eu.etaxonomy.cdm.model.occurrence.FieldUnit;
48
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
49
import eu.etaxonomy.cdm.model.reference.Reference;
50
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
51
import eu.etaxonomy.cdm.model.taxon.Taxon;
52
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
53
import eu.etaxonomy.cdm.model.term.DefinedTerm;
54

    
55

    
56
/**
57
 * @author a.mueller
58
 * @since 01.09.2012
59
 */
60
@Component
61
public class AlgaTerraDnaImport  extends AlgaTerraSpecimenImportBase {
62
	private static final Logger logger = Logger.getLogger(AlgaTerraDnaImport.class);
63

    
64

    
65
	private static int modCount = 5000;
66
	private static final String pluralString = "dna facts";
67
	private static final String dbTableName = "DNAFact";  //??
68

    
69

    
70
	public AlgaTerraDnaImport(){
71
		super(dbTableName, pluralString);
72
	}
73

    
74

    
75
	@Override
76
	protected String getIdQuery(BerlinModelImportState bmState) {
77
		AlgaTerraImportState state = (AlgaTerraImportState)bmState;
78
		String result = " SELECT df.DNAFactId " +
79
				" FROM DNAFact df " +
80
					" INNER JOIN Fact f ON  f.ExtensionFk = df.DNAFactID " +
81
					" WHERE f.FactCategoryFk = 203 ";
82
		if (state.getAlgaTerraConfigurator().isRemoveRestricted()){
83
				result = result + " AND df.ProtectedFlag = 0 ";
84
				logger.warn("DNA with protectedFlag = 0 is currently not imported");
85
		}
86
		result += " ORDER BY df.DNAFactID ";
87
		return result;
88
	}
89

    
90
	/* (non-Javadoc)
91
	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
92
	 */
93
	@Override
94
	protected String getRecordQuery(BerlinModelImportConfigurator config) {
95
			String strQuery =
96
	            " SELECT df.*, pt.RIdentifier as taxonId, f.FactId, f.restrictedFlag, ecoFact.ecoFactId as ecoFactId " +
97
	            " FROM DNAFact df INNER JOIN Fact f ON  f.ExtensionFk = df.DNAFactID " +
98
	            	" LEFT OUTER JOIN PTaxon pt ON f.PTNameFk = pt.PTNameFk AND f.PTRefFk = pt.PTRefFk " +
99
	            	" LEFT OUTER JOIN EcoFact ecoFact ON ecoFact.CultureStrain = df.CultureStrainNo " +
100
	              " WHERE f.FactCategoryFk = 203 AND (df.DNAFactId IN (" + ID_LIST_TOKEN + ")  )"
101
	            + " ORDER BY DNAFactID "
102
            ;
103
		return strQuery;
104
	}
105

    
106
	@Override
107
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState bmState) {
108
		boolean success = true;
109

    
110
		AlgaTerraImportState state = (AlgaTerraImportState)bmState;
111
		try {
112
//			makeVocabulariesAndFeatures(state);
113
		} catch (Exception e1) {
114
			logger.warn("Exception occurred when trying to create Ecofact vocabularies: " + e1.getMessage());
115
			e1.printStackTrace();
116
		}
117
		Set<SpecimenOrObservationBase> samplesToSave = new HashSet<SpecimenOrObservationBase>();
118
		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
119

    
120
		Map<String, FieldUnit> ecoFactFieldObservationMap = partitioner.getObjectMap(ECO_FACT_FIELD_OBSERVATION_NAMESPACE);
121

    
122
		ResultSet rs = partitioner.getResultSet();
123

    
124
		Map<String, Reference> referenceMap = new HashMap<String, Reference>();
125

    
126

    
127
		try {
128

    
129
			int i = 0;
130

    
131
			//for each reference
132
            while (rs.next()){
133

    
134
        		if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
135

    
136
				int dnaFactId = rs.getInt("DNAFactId");
137
				String keywordsStr = rs.getString("Keywords");
138
				String locusStr = rs.getString("Locus");
139
				String definitionStr = rs.getString("Definition");
140

    
141

    
142
				try {
143

    
144
					//source ref
145
					Reference sourceRef = state.getTransactionalSourceReference();
146

    
147
					//import date
148
					DateTime importDateTime = makeImportDateTime(rs);
149

    
150
					//DNA Sample
151
					DnaSample dnaSample = DnaSample.NewInstance();
152
					dnaSample.setCreated(importDateTime);
153

    
154
					//ecoFactFk
155
					makeDerivationFromEcoFact(state, rs, dnaSample, samplesToSave, dnaFactId);
156

    
157
					//sequence
158
					Sequence sequence = makeSequence(rs, dnaSample, dnaFactId, importDateTime);
159

    
160
					//locus
161
					//FIXME Deduplicate DnaMarker
162
					DefinedTerm locus = DefinedTerm.NewDnaMarkerInstance(definitionStr, keywordsStr, null);
163
					locus.setCreated(importDateTime);
164
					this.getTermService().save(locus);
165

    
166
					sequence.setDnaMarker(locus);
167

    
168
					//GenBank Accession
169
					makeGenBankAccession(rs, sequence, importDateTime, dnaFactId);
170

    
171
					//Comment
172
					String commentStr = rs.getString("Comment");
173
					if (isNotBlank(commentStr)){
174
						Annotation annotation = Annotation.NewInstance(commentStr, AnnotationType.EDITORIAL(), Language.DEFAULT());
175
						annotation.setCreated(importDateTime);
176
						sequence.addAnnotation(annotation);
177
					}
178

    
179
					//Indiv.Assoc.
180
					makeIndividualsAssociation(partitioner, rs, state, taxaToSave, dnaSample);
181

    
182
					//TODO titleCache
183
					//prelim implementation:
184
					String cultStrain = rs.getString("CultureStrainNo");
185
					String title = String.format("DNA Sample for %s at %s", cultStrain, keywordsStr);
186
					dnaSample.setTitleCache(title, true);
187

    
188
					//TODO preliminary implementation
189
					String referenceStr = rs.getString("FactReference");
190
					if (isNotBlank(referenceStr)){
191
						Reference ref = referenceMap.get(referenceStr);
192
						if (ref == null){
193
							ref = ReferenceFactory.newGeneric();
194
							ref.setTitleCache(referenceStr, true);
195
							referenceMap.put(referenceStr, ref);
196
						}
197
						sequence.addCitation(ref);
198
					}
199

    
200
					//save
201
					samplesToSave.add(dnaSample);
202

    
203

    
204
				} catch (Exception e) {
205
					logger.warn("Exception in ecoFact: ecoFactId " + dnaFactId + ". " + e.getMessage());
206
					e.printStackTrace();
207
				}
208

    
209
            }
210

    
211
			logger.warn("DNASample or EcoFacts to save: " + samplesToSave.size());
212
			getOccurrenceService().saveOrUpdate(samplesToSave);
213
			logger.warn("Taxa to save: " + samplesToSave.size());
214
			getTaxonService().saveOrUpdate(taxaToSave);
215

    
216
			return success;
217
		} catch (SQLException e) {
218
			logger.error("SQLException:" +  e);
219
			return false;
220
		}
221
	}
222

    
223

    
224
	private void makeDerivationFromEcoFact(AlgaTerraImportState state, ResultSet rs, DnaSample dnaSample, Set<SpecimenOrObservationBase> samplesToSave, Integer dnaFactId) throws SQLException {
225
		Integer ecoFactFk = nullSafeInt(rs, "ecoFactId");
226
		if (ecoFactFk != null){
227

    
228
			DerivedUnit ecoFact = (DerivedUnit)state.getRelatedObject(ECO_FACT_DERIVED_UNIT_NAMESPACE, ecoFactFk.toString());
229
			if (ecoFact == null){
230
				logger.warn("EcoFact is null for ecoFactFk: " + ecoFactFk + ", DnaFactId: " + dnaFactId);
231
			}else{
232
				DerivationEvent.NewSimpleInstance(ecoFact, dnaSample, DerivationEventType.DNA_EXTRACTION());
233
				samplesToSave.add(ecoFact);
234
			}
235
		}
236
	}
237

    
238

    
239

    
240
	private void makeIndividualsAssociation(ResultSetPartitioner partitioner, ResultSet rs, AlgaTerraImportState state, Set<TaxonBase> taxaToSave, DnaSample dnaSample) throws SQLException{
241
		Reference sourceRef = state.getTransactionalSourceReference();
242
		Map<String, TaxonBase> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
243
		Integer taxonId = rs.getInt("taxonId");
244
		Integer factId = rs.getInt("factId");
245
		Taxon taxon = getTaxon(state, taxonId, taxonMap, factId);
246
		TaxonDescription desc = getTaxonDescription(state, taxon, sourceRef);
247
		IndividualsAssociation assoc = IndividualsAssociation.NewInstance(dnaSample);
248
		desc.addElement(assoc);
249
		taxaToSave.add(taxon);
250
	}
251

    
252

    
253
	/**
254
	 * @param rs
255
	 * @return
256
	 * @throws SQLException
257
	 * @throws ParseException
258
	 */
259
	private DateTime makeImportDateTime(ResultSet rs) throws SQLException,
260
			ParseException {
261
		DateTime importDateTime = null;
262
		String importDateTimeStr = rs.getString("ImportDateTime");
263
		if (isNotBlank(importDateTimeStr)){
264
			importDateTimeStr = importDateTimeStr.substring(0,10);
265
			DateTimeFormatter dayFormatter = DateTimeFormat.forPattern("dd.MM.yyyy");
266

    
267
//						DateTimeFormatter formatter = new DateTimeFormatterBuilder().
268
//								append;
269
			DateTimeParser p = new DateTimeParser(dayFormatter);
270
			importDateTime = p.parse(importDateTimeStr, Locale.GERMANY);
271

    
272
		}
273
		return importDateTime;
274
	}
275

    
276

    
277

    
278
	private Sequence makeSequence(ResultSet rs, DnaSample dnaSample, int dnaFactId, DateTime importDateTime) throws SQLException {
279
		String sequenceStr = rs.getString("PlainSequence");
280
		Integer seqLen = nullSafeInt(rs, "SeqLen");
281

    
282
		if (sequenceStr == null){
283
			logger.warn("PlainSequence is null. Id: " + dnaFactId);
284
		}else{
285
			if (seqLen == null){
286
				logger.warn("SeqLen is null for dnaFact: "  + dnaFactId);
287
			}else if (sequenceStr.length() != seqLen){
288
				logger.warn("SeqLen (" + seqLen+ ") and OriginalLen ("+sequenceStr.length()+") differ for dnaFact: "  + dnaFactId);
289
			}
290
		}
291

    
292
		Sequence sequence = Sequence.NewInstance(sequenceStr, seqLen);
293
		sequence.setCreated(importDateTime);
294
		dnaSample.addSequence(sequence);
295
		return sequence;
296
	}
297

    
298

    
299

    
300
	/**
301
	 * @param sequence2
302
	 * @param rs
303
	 * @param accessionStr
304
	 * @param notesStr
305
	 * @param sequence
306
	 * @param importDateTime
307
	 * @return
308
	 * @throws SQLException
309
	 */
310
	private void makeGenBankAccession(ResultSet rs, Sequence sequence, DateTime importDateTime, Integer dnaFactId) throws SQLException {
311
		String accessionStr = rs.getString("Accession");
312
		String notesStr = rs.getString("Notes");
313
		String versionStr = rs.getString("Version");
314

    
315
		URI genBankUri = null;
316
		if (StringUtils.isNotBlank(notesStr)){
317
			if (notesStr.startsWith("http")){
318
				genBankUri = URI.create(notesStr);
319
			}else{
320
				logger.warn("Notes do not start with URI: " +  notesStr);
321
			}
322
		}
323

    
324
		if (isNotBlank(accessionStr) || genBankUri != null){
325
			if (accessionStr != null && accessionStr.trim().equals("")){
326
				accessionStr = null;
327
			}
328
			if (isGenBankAccessionNumber(accessionStr, versionStr, genBankUri, dnaFactId) || genBankUri != null){
329
				sequence.setGeneticAccessionNumber(accessionStr);
330
			}
331
		}
332
	}
333

    
334
	private boolean isGenBankAccessionNumber(String accessionStr, String versionStr, URI genBankUri, Integer dnaFactId) {
335
		boolean isGenBankAccessionNumber = accessionStr.matches("[A-Z]{2}\\d{6}");
336
		boolean versionHasGenBankPart = versionStr.matches(".*GI:.*");
337
		if (isGenBankAccessionNumber && versionHasGenBankPart){
338
			return true;
339
		}else {
340
			if (genBankUri != null){
341
				logger.warn("GenBank Uri exists but accession or version have been identified to use GenBank syntax. DNAFactID: " + dnaFactId);
342
			}
343
			if(isGenBankAccessionNumber || versionHasGenBankPart){
344
				logger.warn("Either accession ("+ accessionStr +") or version ("+versionStr+") use GenBank syntax but the other does not. DNAFactID: " + dnaFactId);
345
			}
346
			return false;
347
		}
348
	}
349

    
350

    
351

    
352
	@Override
353
    protected String getDerivedUnitNameSpace(){
354
		return ECO_FACT_DERIVED_UNIT_NAMESPACE;
355
	}
356

    
357
	@Override
358
    protected String getFieldObservationNameSpace(){
359
		return ECO_FACT_FIELD_OBSERVATION_NAMESPACE;
360
	}
361

    
362
	@Override
363
	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
364
		String nameSpace;
365
		Set<String> idSet;
366
		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
367

    
368
		try{
369
			Set<String> taxonIdSet = new HashSet<>();
370
			Set<String> ecoFactFkSet = new HashSet<>();
371
			while (rs.next()){
372
				handleForeignKey(rs, taxonIdSet, "taxonId");
373
				handleForeignKey(rs, ecoFactFkSet, "ecoFactId");
374
			}
375

    
376
			//taxon map
377
			nameSpace = BerlinModelTaxonImport.NAMESPACE;
378
			idSet = taxonIdSet;
379
			Map<String, TaxonBase> objectMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonBase.class, idSet, nameSpace);
380
			result.put(nameSpace, objectMap);
381

    
382

    
383
			//eco fact derived unit map
384
			nameSpace = AlgaTerraSpecimenImportBase.ECO_FACT_DERIVED_UNIT_NAMESPACE;
385
			idSet = ecoFactFkSet;
386
			Map<String, DerivedUnit> derivedUnitMap = getCommonService().getSourcedObjectsByIdInSourceC(DerivedUnit.class, idSet, nameSpace);
387
			result.put(nameSpace, derivedUnitMap);
388

    
389
		} catch (SQLException e) {
390
			throw new RuntimeException(e);
391
		}
392
		return result;
393
	}
394

    
395
	@Override
396
	protected boolean doCheck(BerlinModelImportState state){
397
		IOValidator<BerlinModelImportState> validator = new AlgaTerraDnaImportValidator();
398
		return validator.validate(state);
399
	}
400

    
401
	@Override
402
	protected boolean isIgnore(BerlinModelImportState state){
403
		return ! ((AlgaTerraImportState)state).getAlgaTerraConfigurator().isDoDna();
404
	}
405

    
406
}
(2-2/15)