Revision 8422c0cd
Added by Andreas Müller almost 8 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/algaterra/AlgaTerraDnaImport.java | ||
---|---|---|
1 | 1 |
/** |
2 | 2 |
* Copyright (C) 2007 EDIT |
3 |
* European Distributed Institute of Taxonomy
|
|
3 |
* European Distributed Institute of Taxonomy |
|
4 | 4 |
* http://www.e-taxonomy.eu |
5 |
*
|
|
5 |
* |
|
6 | 6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
7 | 7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
8 | 8 |
*/ |
... | ... | |
61 | 61 |
public class AlgaTerraDnaImport extends AlgaTerraSpecimenImportBase { |
62 | 62 |
private static final Logger logger = Logger.getLogger(AlgaTerraDnaImport.class); |
63 | 63 |
|
64 |
|
|
64 |
|
|
65 | 65 |
private static int modCount = 5000; |
66 | 66 |
private static final String pluralString = "dna facts"; |
67 |
private static final String dbTableName = "DNAFact"; //??
|
|
67 |
private static final String dbTableName = "DNAFact"; //?? |
|
68 | 68 |
|
69 | 69 |
|
70 | 70 |
public AlgaTerraDnaImport(){ |
71 | 71 |
super(dbTableName, pluralString); |
72 | 72 |
} |
73 |
|
|
74 |
|
|
75 |
|
|
73 |
|
|
74 |
|
|
75 |
|
|
76 | 76 |
/* (non-Javadoc) |
77 | 77 |
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery() |
78 | 78 |
*/ |
79 | 79 |
@Override |
80 | 80 |
protected String getIdQuery(BerlinModelImportState bmState) { |
81 | 81 |
AlgaTerraImportState state = (AlgaTerraImportState)bmState; |
82 |
String result = " SELECT df.DNAFactId " +
|
|
82 |
String result = " SELECT df.DNAFactId " + |
|
83 | 83 |
" FROM DNAFact df " + |
84 | 84 |
" INNER JOIN Fact f ON f.ExtensionFk = df.DNAFactID " + |
85 | 85 |
" WHERE f.FactCategoryFk = 203 "; |
... | ... | |
96 | 96 |
*/ |
97 | 97 |
@Override |
98 | 98 |
protected String getRecordQuery(BerlinModelImportConfigurator config) { |
99 |
String strQuery =
|
|
99 |
String strQuery = |
|
100 | 100 |
" SELECT df.*, pt.RIdentifier as taxonId, f.FactId, f.restrictedFlag, ecoFact.ecoFactId as ecoFactId " + |
101 | 101 |
" FROM DNAFact df INNER JOIN Fact f ON f.ExtensionFk = df.DNAFactID " + |
102 |
" LEFT OUTER JOIN PTaxon pt ON f.PTNameFk = pt.PTNameFk AND f.PTRefFk = pt.PTRefFk " +
|
|
102 |
" LEFT OUTER JOIN PTaxon pt ON f.PTNameFk = pt.PTNameFk AND f.PTRefFk = pt.PTRefFk " + |
|
103 | 103 |
" LEFT OUTER JOIN EcoFact ecoFact ON ecoFact.CultureStrain = df.CultureStrainNo " + |
104 |
" WHERE f.FactCategoryFk = 203 AND (df.DNAFactId IN (" + ID_LIST_TOKEN + ") )"
|
|
104 |
" WHERE f.FactCategoryFk = 203 AND (df.DNAFactId IN (" + ID_LIST_TOKEN + ") )" |
|
105 | 105 |
+ " ORDER BY DNAFactID " |
106 | 106 |
; |
107 | 107 |
return strQuery; |
... | ... | |
110 | 110 |
@Override |
111 | 111 |
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState bmState) { |
112 | 112 |
boolean success = true; |
113 |
|
|
113 |
|
|
114 | 114 |
AlgaTerraImportState state = (AlgaTerraImportState)bmState; |
115 | 115 |
try { |
116 | 116 |
// makeVocabulariesAndFeatures(state); |
... | ... | |
120 | 120 |
} |
121 | 121 |
Set<SpecimenOrObservationBase> samplesToSave = new HashSet<SpecimenOrObservationBase>(); |
122 | 122 |
Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>(); |
123 |
|
|
124 |
Map<String, FieldUnit> ecoFactFieldObservationMap = (Map<String, FieldUnit>) partitioner.getObjectMap(ECO_FACT_FIELD_OBSERVATION_NAMESPACE);
|
|
125 |
|
|
123 |
|
|
124 |
Map<String, FieldUnit> ecoFactFieldObservationMap = partitioner.getObjectMap(ECO_FACT_FIELD_OBSERVATION_NAMESPACE); |
|
125 |
|
|
126 | 126 |
ResultSet rs = partitioner.getResultSet(); |
127 |
|
|
127 |
|
|
128 | 128 |
Map<String, Reference> referenceMap = new HashMap<String, Reference>(); |
129 |
|
|
129 |
|
|
130 | 130 |
|
131 | 131 |
try { |
132 |
|
|
132 |
|
|
133 | 133 |
int i = 0; |
134 | 134 |
|
135 | 135 |
//for each reference |
136 | 136 |
while (rs.next()){ |
137 |
|
|
137 |
|
|
138 | 138 |
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));} |
139 |
|
|
139 |
|
|
140 | 140 |
int dnaFactId = rs.getInt("DNAFactId"); |
141 | 141 |
String keywordsStr = rs.getString("Keywords"); |
142 | 142 |
String locusStr = rs.getString("Locus"); |
143 | 143 |
String definitionStr = rs.getString("Definition"); |
144 |
|
|
145 |
|
|
144 |
|
|
145 |
|
|
146 | 146 |
try { |
147 |
|
|
147 |
|
|
148 | 148 |
//source ref |
149 |
Reference<?> sourceRef = state.getTransactionalSourceReference();
|
|
150 |
|
|
149 |
Reference sourceRef = state.getTransactionalSourceReference(); |
|
150 |
|
|
151 | 151 |
//import date |
152 | 152 |
DateTime importDateTime = makeImportDateTime(rs); |
153 |
|
|
153 |
|
|
154 | 154 |
//DNA Sample |
155 | 155 |
DnaSample dnaSample = DnaSample.NewInstance(); |
156 | 156 |
dnaSample.setCreated(importDateTime); |
157 |
|
|
157 |
|
|
158 | 158 |
//ecoFactFk |
159 | 159 |
makeDerivationFromEcoFact(state, rs, dnaSample, samplesToSave, dnaFactId); |
160 |
|
|
160 |
|
|
161 | 161 |
//sequence |
162 | 162 |
Sequence sequence = makeSequence(rs, dnaSample, dnaFactId, importDateTime); |
163 |
|
|
163 |
|
|
164 | 164 |
//locus |
165 | 165 |
//FIXME Deduplicate DnaMarker |
166 | 166 |
DefinedTerm locus = DefinedTerm.NewDnaMarkerInstance(definitionStr, keywordsStr, null); |
167 | 167 |
locus.setCreated(importDateTime); |
168 | 168 |
this.getTermService().save(locus); |
169 |
|
|
169 |
|
|
170 | 170 |
sequence.setDnaMarker(locus); |
171 |
|
|
171 |
|
|
172 | 172 |
//GenBank Accession |
173 | 173 |
makeGenBankAccession(rs, sequence, importDateTime, dnaFactId); |
174 |
|
|
174 |
|
|
175 | 175 |
//Comment |
176 | 176 |
String commentStr = rs.getString("Comment"); |
177 | 177 |
if (isNotBlank(commentStr)){ |
... | ... | |
179 | 179 |
annotation.setCreated(importDateTime); |
180 | 180 |
sequence.addAnnotation(annotation); |
181 | 181 |
} |
182 |
|
|
182 |
|
|
183 | 183 |
//Indiv.Assoc. |
184 | 184 |
makeIndividualsAssociation(partitioner, rs, state, taxaToSave, dnaSample); |
185 |
|
|
185 |
|
|
186 | 186 |
//TODO titleCache |
187 | 187 |
//prelim implementation: |
188 | 188 |
String cultStrain = rs.getString("CultureStrainNo"); |
... | ... | |
192 | 192 |
//TODO preliminary implementation |
193 | 193 |
String referenceStr = rs.getString("FactReference"); |
194 | 194 |
if (isNotBlank(referenceStr)){ |
195 |
Reference<?> ref = referenceMap.get(referenceStr);
|
|
195 |
Reference ref = referenceMap.get(referenceStr); |
|
196 | 196 |
if (ref == null){ |
197 | 197 |
ref = ReferenceFactory.newGeneric(); |
198 | 198 |
ref.setTitleCache(referenceStr, true); |
... | ... | |
200 | 200 |
} |
201 | 201 |
sequence.addCitation(ref); |
202 | 202 |
} |
203 |
|
|
203 |
|
|
204 | 204 |
//save |
205 |
samplesToSave.add(dnaSample);
|
|
206 |
|
|
205 |
samplesToSave.add(dnaSample); |
|
206 |
|
|
207 | 207 |
|
208 | 208 |
} catch (Exception e) { |
209 | 209 |
logger.warn("Exception in ecoFact: ecoFactId " + dnaFactId + ". " + e.getMessage()); |
210 | 210 |
e.printStackTrace(); |
211 |
}
|
|
212 |
|
|
211 |
} |
|
212 |
|
|
213 | 213 |
} |
214 |
|
|
214 |
|
|
215 | 215 |
logger.warn("DNASample or EcoFacts to save: " + samplesToSave.size()); |
216 |
getOccurrenceService().saveOrUpdate(samplesToSave);
|
|
216 |
getOccurrenceService().saveOrUpdate(samplesToSave); |
|
217 | 217 |
logger.warn("Taxa to save: " + samplesToSave.size()); |
218 | 218 |
getTaxonService().saveOrUpdate(taxaToSave); |
219 |
|
|
219 |
|
|
220 | 220 |
return success; |
221 | 221 |
} catch (SQLException e) { |
222 | 222 |
logger.error("SQLException:" + e); |
... | ... | |
228 | 228 |
private void makeDerivationFromEcoFact(AlgaTerraImportState state, ResultSet rs, DnaSample dnaSample, Set<SpecimenOrObservationBase> samplesToSave, Integer dnaFactId) throws SQLException { |
229 | 229 |
Integer ecoFactFk = nullSafeInt(rs, "ecoFactId"); |
230 | 230 |
if (ecoFactFk != null){ |
231 |
|
|
231 |
|
|
232 | 232 |
DerivedUnit ecoFact = (DerivedUnit)state.getRelatedObject(ECO_FACT_DERIVED_UNIT_NAMESPACE, ecoFactFk.toString()); |
233 | 233 |
if (ecoFact == null){ |
234 | 234 |
logger.warn("EcoFact is null for ecoFactFk: " + ecoFactFk + ", DnaFactId: " + dnaFactId); |
... | ... | |
236 | 236 |
DerivationEvent.NewSimpleInstance(ecoFact, dnaSample, DerivationEventType.DNA_EXTRACTION()); |
237 | 237 |
samplesToSave.add(ecoFact); |
238 | 238 |
} |
239 |
}
|
|
239 |
} |
|
240 | 240 |
} |
241 | 241 |
|
242 | 242 |
|
243 | 243 |
|
244 | 244 |
private void makeIndividualsAssociation(ResultSetPartitioner partitioner, ResultSet rs, AlgaTerraImportState state, Set<TaxonBase> taxaToSave, DnaSample dnaSample) throws SQLException{ |
245 |
Reference<?> sourceRef = state.getTransactionalSourceReference();
|
|
246 |
Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
|
|
245 |
Reference sourceRef = state.getTransactionalSourceReference(); |
|
246 |
Map<String, TaxonBase> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE); |
|
247 | 247 |
Integer taxonId = rs.getInt("taxonId"); |
248 | 248 |
Integer factId = rs.getInt("factId"); |
249 | 249 |
Taxon taxon = getTaxon(state, taxonId, taxonMap, factId); |
... | ... | |
252 | 252 |
desc.addElement(assoc); |
253 | 253 |
taxaToSave.add(taxon); |
254 | 254 |
} |
255 |
|
|
255 |
|
|
256 | 256 |
|
257 | 257 |
/** |
258 | 258 |
* @param rs |
... | ... | |
272 | 272 |
// append; |
273 | 273 |
DateTimeParser p = new DateTimeParser(dayFormatter); |
274 | 274 |
importDateTime = p.parse(importDateTimeStr, Locale.GERMANY); |
275 |
|
|
275 |
|
|
276 | 276 |
} |
277 | 277 |
return importDateTime; |
278 | 278 |
} |
... | ... | |
292 | 292 |
logger.warn("SeqLen (" + seqLen+ ") and OriginalLen ("+sequenceStr.length()+") differ for dnaFact: " + dnaFactId); |
293 | 293 |
} |
294 | 294 |
} |
295 |
|
|
295 |
|
|
296 | 296 |
Sequence sequence = Sequence.NewInstance(sequenceStr, seqLen); |
297 | 297 |
sequence.setCreated(importDateTime); |
298 | 298 |
dnaSample.addSequence(sequence); |
... | ... | |
302 | 302 |
|
303 | 303 |
|
304 | 304 |
/** |
305 |
* @param sequence2
|
|
306 |
* @param rs
|
|
305 |
* @param sequence2 |
|
306 |
* @param rs |
|
307 | 307 |
* @param accessionStr |
308 | 308 |
* @param notesStr |
309 | 309 |
* @param sequence |
310 |
* @param importDateTime
|
|
310 |
* @param importDateTime |
|
311 | 311 |
* @return |
312 |
* @throws SQLException
|
|
312 |
* @throws SQLException |
|
313 | 313 |
*/ |
314 | 314 |
private void makeGenBankAccession(ResultSet rs, Sequence sequence, DateTime importDateTime, Integer dnaFactId) throws SQLException { |
315 | 315 |
String accessionStr = rs.getString("Accession"); |
316 | 316 |
String notesStr = rs.getString("Notes"); |
317 | 317 |
String versionStr = rs.getString("Version"); |
318 |
|
|
318 |
|
|
319 | 319 |
URI genBankUri = null; |
320 | 320 |
if (StringUtils.isNotBlank(notesStr)){ |
321 | 321 |
if (notesStr.startsWith("http")){ |
... | ... | |
324 | 324 |
logger.warn("Notes do not start with URI: " + notesStr); |
325 | 325 |
} |
326 | 326 |
} |
327 |
|
|
327 |
|
|
328 | 328 |
if (isNotBlank(accessionStr) || genBankUri != null){ |
329 | 329 |
if (accessionStr != null && accessionStr.trim().equals("")){ |
330 | 330 |
accessionStr = null; |
... | ... | |
334 | 334 |
} |
335 | 335 |
} |
336 | 336 |
} |
337 |
|
|
337 |
|
|
338 | 338 |
private boolean isGenBankAccessionNumber(String accessionStr, String versionStr, URI genBankUri, Integer dnaFactId) { |
339 | 339 |
boolean isGenBankAccessionNumber = accessionStr.matches("[A-Z]{2}\\d{6}"); |
340 | 340 |
boolean versionHasGenBankPart = versionStr.matches(".*GI:.*"); |
... | ... | |
342 | 342 |
return true; |
343 | 343 |
}else { |
344 | 344 |
if (genBankUri != null){ |
345 |
logger.warn("GenBank Uri exists but accession or version have been identified to use GenBank syntax. DNAFactID: " + dnaFactId);
|
|
345 |
logger.warn("GenBank Uri exists but accession or version have been identified to use GenBank syntax. DNAFactID: " + dnaFactId); |
|
346 | 346 |
} |
347 | 347 |
if(isGenBankAccessionNumber || versionHasGenBankPart){ |
348 |
logger.warn("Either accession ("+ accessionStr +") or version ("+versionStr+") use GenBank syntax but the other does not. DNAFactID: " + dnaFactId);
|
|
348 |
logger.warn("Either accession ("+ accessionStr +") or version ("+versionStr+") use GenBank syntax but the other does not. DNAFactID: " + dnaFactId); |
|
349 | 349 |
} |
350 | 350 |
return false; |
351 | 351 |
} |
... | ... | |
353 | 353 |
|
354 | 354 |
|
355 | 355 |
|
356 |
protected String getDerivedUnitNameSpace(){ |
|
356 |
@Override |
|
357 |
protected String getDerivedUnitNameSpace(){ |
|
357 | 358 |
return ECO_FACT_DERIVED_UNIT_NAMESPACE; |
358 | 359 |
} |
359 |
|
|
360 |
protected String getFieldObservationNameSpace(){ |
|
360 |
|
|
361 |
@Override |
|
362 |
protected String getFieldObservationNameSpace(){ |
|
361 | 363 |
return ECO_FACT_FIELD_OBSERVATION_NAMESPACE; |
362 | 364 |
} |
363 | 365 |
|
... | ... | |
367 | 369 |
Class<?> cdmClass; |
368 | 370 |
Set<String> idSet; |
369 | 371 |
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>(); |
370 |
|
|
372 |
|
|
371 | 373 |
try{ |
372 | 374 |
Set<String> taxonIdSet = new HashSet<String>(); |
373 |
|
|
375 |
|
|
374 | 376 |
Set<String> ecoFactFkSet = new HashSet<String>(); |
375 |
|
|
377 |
|
|
376 | 378 |
while (rs.next()){ |
377 | 379 |
handleForeignKey(rs, taxonIdSet, "taxonId"); |
378 | 380 |
handleForeignKey(rs, ecoFactFkSet, "ecoFactId"); |
379 | 381 |
} |
380 |
|
|
382 |
|
|
381 | 383 |
//taxon map |
382 | 384 |
nameSpace = BerlinModelTaxonImport.NAMESPACE; |
383 | 385 |
cdmClass = TaxonBase.class; |
384 | 386 |
idSet = taxonIdSet; |
385 | 387 |
Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace); |
386 | 388 |
result.put(nameSpace, objectMap); |
387 |
|
|
389 |
|
|
388 | 390 |
|
389 | 391 |
//eco fact derived unit map |
390 | 392 |
nameSpace = AlgaTerraFactEcologyImport.ECO_FACT_DERIVED_UNIT_NAMESPACE; |
... | ... | |
392 | 394 |
idSet = ecoFactFkSet; |
393 | 395 |
Map<String, DerivedUnit> derivedUnitMap = (Map<String, DerivedUnit>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace); |
394 | 396 |
result.put(nameSpace, derivedUnitMap); |
395 |
|
|
397 |
|
|
396 | 398 |
} catch (SQLException e) { |
397 | 399 |
throw new RuntimeException(e); |
398 | 400 |
} |
... | ... | |
409 | 411 |
protected boolean isIgnore(BerlinModelImportState state){ |
410 | 412 |
return ! ((AlgaTerraImportState)state).getAlgaTerraConfigurator().isDoDna(); |
411 | 413 |
} |
412 |
|
|
414 |
|
|
413 | 415 |
} |
Also available in: Unified diff
Remove generics from Reference in cdmlib-app #5830