2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.algaterra
;
13 import java
.sql
.ResultSet
;
14 import java
.sql
.SQLException
;
15 import java
.text
.ParseException
;
16 import java
.util
.HashMap
;
17 import java
.util
.HashSet
;
18 import java
.util
.Locale
;
22 import org
.apache
.commons
.lang
.StringUtils
;
23 import org
.apache
.log4j
.Logger
;
24 import org
.joda
.time
.DateTime
;
25 import org
.joda
.time
.format
.DateTimeFormat
;
26 import org
.joda
.time
.format
.DateTimeFormatter
;
27 import org
.springframework
.format
.datetime
.joda
.DateTimeParser
;
28 import org
.springframework
.stereotype
.Component
;
30 import eu
.etaxonomy
.cdm
.io
.algaterra
.validation
.AlgaTerraDnaImportValidator
;
31 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.BerlinModelImportConfigurator
;
32 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.BerlinModelImportState
;
33 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.BerlinModelTaxonImport
;
34 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
35 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
37 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
38 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
39 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTerm
;
40 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
41 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
42 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
43 import eu
.etaxonomy
.cdm
.model
.molecular
.DnaSample
;
44 import eu
.etaxonomy
.cdm
.model
.molecular
.Sequence
;
45 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivationEvent
;
46 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivationEventType
;
47 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
48 import eu
.etaxonomy
.cdm
.model
.occurrence
.FieldUnit
;
49 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
50 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
51 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
52 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
53 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
61 public class AlgaTerraDnaImport
extends AlgaTerraSpecimenImportBase
{
62 private static final Logger logger
= Logger
.getLogger(AlgaTerraDnaImport
.class);
65 private static int modCount
= 5000;
66 private static final String pluralString
= "dna facts";
67 private static final String dbTableName
= "DNAFact"; //??
70 public AlgaTerraDnaImport(){
71 super(dbTableName
, pluralString
);
77 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
80 protected String
getIdQuery(BerlinModelImportState bmState
) {
81 AlgaTerraImportState state
= (AlgaTerraImportState
)bmState
;
82 String result
= " SELECT df.DNAFactId " +
84 " INNER JOIN Fact f ON f.ExtensionFk = df.DNAFactID " +
85 " WHERE f.FactCategoryFk = 203 ";
86 if (state
.getAlgaTerraConfigurator().isRemoveRestricted()){
87 result
= result
+ " AND df.ProtectedFlag = 0 ";
88 logger
.warn("DNA with protectedFlag = 0 is currently not imported");
90 result
+= " ORDER BY df.DNAFactID ";
95 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
98 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
100 " SELECT df.*, pt.RIdentifier as taxonId, f.FactId, f.restrictedFlag, ecoFact.ecoFactId as ecoFactId " +
101 " FROM DNAFact df INNER JOIN Fact f ON f.ExtensionFk = df.DNAFactID " +
102 " LEFT OUTER JOIN PTaxon pt ON f.PTNameFk = pt.PTNameFk AND f.PTRefFk = pt.PTRefFk " +
103 " LEFT OUTER JOIN EcoFact ecoFact ON ecoFact.CultureStrain = df.CultureStrainNo " +
104 " WHERE f.FactCategoryFk = 203 AND (df.DNAFactId IN (" + ID_LIST_TOKEN
+ ") )"
105 + " ORDER BY DNAFactID "
111 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState bmState
) {
112 boolean success
= true;
114 AlgaTerraImportState state
= (AlgaTerraImportState
)bmState
;
116 // makeVocabulariesAndFeatures(state);
117 } catch (Exception e1
) {
118 logger
.warn("Exception occurred when trying to create Ecofact vocabularies: " + e1
.getMessage());
119 e1
.printStackTrace();
121 Set
<SpecimenOrObservationBase
> samplesToSave
= new HashSet
<SpecimenOrObservationBase
>();
122 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
124 Map
<String
, FieldUnit
> ecoFactFieldObservationMap
= (Map
<String
, FieldUnit
>) partitioner
.getObjectMap(ECO_FACT_FIELD_OBSERVATION_NAMESPACE
);
126 ResultSet rs
= partitioner
.getResultSet();
128 Map
<String
, Reference
> referenceMap
= new HashMap
<String
, Reference
>();
138 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
140 int dnaFactId
= rs
.getInt("DNAFactId");
141 String keywordsStr
= rs
.getString("Keywords");
142 String locusStr
= rs
.getString("Locus");
143 String definitionStr
= rs
.getString("Definition");
149 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
152 DateTime importDateTime
= makeImportDateTime(rs
);
155 DnaSample dnaSample
= DnaSample
.NewInstance();
156 dnaSample
.setCreated(importDateTime
);
159 makeDerivationFromEcoFact(state
, rs
, dnaSample
, samplesToSave
, dnaFactId
);
162 Sequence sequence
= makeSequence(rs
, dnaSample
, dnaFactId
, importDateTime
);
165 //FIXME Deduplicate DnaMarker
166 DefinedTerm locus
= DefinedTerm
.NewDnaMarkerInstance(definitionStr
, keywordsStr
, null);
167 locus
.setCreated(importDateTime
);
168 this.getTermService().save(locus
);
170 sequence
.setDnaMarker(locus
);
173 makeGenBankAccession(rs
, sequence
, importDateTime
, dnaFactId
);
176 String commentStr
= rs
.getString("Comment");
177 if (isNotBlank(commentStr
)){
178 Annotation annotation
= Annotation
.NewInstance(commentStr
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
179 annotation
.setCreated(importDateTime
);
180 sequence
.addAnnotation(annotation
);
184 makeIndividualsAssociation(partitioner
, rs
, state
, taxaToSave
, dnaSample
);
187 //prelim implementation:
188 String cultStrain
= rs
.getString("CultureStrainNo");
189 String title
= String
.format("DNA Sample for %s at %s", cultStrain
, keywordsStr
);
190 dnaSample
.setTitleCache(title
, true);
192 //TODO preliminary implementation
193 String referenceStr
= rs
.getString("FactReference");
194 if (isNotBlank(referenceStr
)){
195 Reference
<?
> ref
= referenceMap
.get(referenceStr
);
197 ref
= ReferenceFactory
.newGeneric();
198 ref
.setTitleCache(referenceStr
, true);
199 referenceMap
.put(referenceStr
, ref
);
201 sequence
.addCitation(ref
);
205 samplesToSave
.add(dnaSample
);
208 } catch (Exception e
) {
209 logger
.warn("Exception in ecoFact: ecoFactId " + dnaFactId
+ ". " + e
.getMessage());
215 logger
.warn("DNASample or EcoFacts to save: " + samplesToSave
.size());
216 getOccurrenceService().saveOrUpdate(samplesToSave
);
217 logger
.warn("Taxa to save: " + samplesToSave
.size());
218 getTaxonService().saveOrUpdate(taxaToSave
);
221 } catch (SQLException e
) {
222 logger
.error("SQLException:" + e
);
228 private void makeDerivationFromEcoFact(AlgaTerraImportState state
, ResultSet rs
, DnaSample dnaSample
, Set
<SpecimenOrObservationBase
> samplesToSave
, Integer dnaFactId
) throws SQLException
{
229 Integer ecoFactFk
= nullSafeInt(rs
, "ecoFactId");
230 if (ecoFactFk
!= null){
232 DerivedUnit ecoFact
= (DerivedUnit
)state
.getRelatedObject(ECO_FACT_DERIVED_UNIT_NAMESPACE
, ecoFactFk
.toString());
233 if (ecoFact
== null){
234 logger
.warn("EcoFact is null for ecoFactFk: " + ecoFactFk
+ ", DnaFactId: " + dnaFactId
);
236 DerivationEvent
.NewSimpleInstance(ecoFact
, dnaSample
, DerivationEventType
.DNA_EXTRACTION());
237 samplesToSave
.add(ecoFact
);
244 private void makeIndividualsAssociation(ResultSetPartitioner partitioner
, ResultSet rs
, AlgaTerraImportState state
, Set
<TaxonBase
> taxaToSave
, DnaSample dnaSample
) throws SQLException
{
245 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
246 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>) partitioner
.getObjectMap(BerlinModelTaxonImport
.NAMESPACE
);
247 Integer taxonId
= rs
.getInt("taxonId");
248 Integer factId
= rs
.getInt("factId");
249 Taxon taxon
= getTaxon(state
, taxonId
, taxonMap
, factId
);
250 TaxonDescription desc
= getTaxonDescription(state
, taxon
, sourceRef
);
251 IndividualsAssociation assoc
= IndividualsAssociation
.NewInstance(dnaSample
);
252 desc
.addElement(assoc
);
253 taxaToSave
.add(taxon
);
260 * @throws SQLException
261 * @throws ParseException
263 private DateTime
makeImportDateTime(ResultSet rs
) throws SQLException
,
265 DateTime importDateTime
= null;
266 String importDateTimeStr
= rs
.getString("ImportDateTime");
267 if (isNotBlank(importDateTimeStr
)){
268 importDateTimeStr
= importDateTimeStr
.substring(0,10);
269 DateTimeFormatter dayFormatter
= DateTimeFormat
.forPattern("dd.MM.yyyy");
271 // DateTimeFormatter formatter = new DateTimeFormatterBuilder().
273 DateTimeParser p
= new DateTimeParser(dayFormatter
);
274 importDateTime
= p
.parse(importDateTimeStr
, Locale
.GERMANY
);
277 return importDateTime
;
282 private Sequence
makeSequence(ResultSet rs
, DnaSample dnaSample
, int dnaFactId
, DateTime importDateTime
) throws SQLException
{
283 String sequenceStr
= rs
.getString("PlainSequence");
284 Integer seqLen
= nullSafeInt(rs
, "SeqLen");
286 if (sequenceStr
== null){
287 logger
.warn("PlainSequence is null. Id: " + dnaFactId
);
290 logger
.warn("SeqLen is null for dnaFact: " + dnaFactId
);
291 }else if (sequenceStr
.length() != seqLen
){
292 logger
.warn("SeqLen (" + seqLen
+ ") and OriginalLen ("+sequenceStr
.length()+") differ for dnaFact: " + dnaFactId
);
296 Sequence sequence
= Sequence
.NewInstance(sequenceStr
, seqLen
);
297 sequence
.setCreated(importDateTime
);
298 dnaSample
.addSequence(sequence
);
307 * @param accessionStr
310 * @param importDateTime
312 * @throws SQLException
314 private void makeGenBankAccession(ResultSet rs
, Sequence sequence
, DateTime importDateTime
, Integer dnaFactId
) throws SQLException
{
315 String accessionStr
= rs
.getString("Accession");
316 String notesStr
= rs
.getString("Notes");
317 String versionStr
= rs
.getString("Version");
319 URI genBankUri
= null;
320 if (StringUtils
.isNotBlank(notesStr
)){
321 if (notesStr
.startsWith("http")){
322 genBankUri
= URI
.create(notesStr
);
324 logger
.warn("Notes do not start with URI: " + notesStr
);
328 if (isNotBlank(accessionStr
) || genBankUri
!= null){
329 if (accessionStr
!= null && accessionStr
.trim().equals("")){
332 if (isGenBankAccessionNumber(accessionStr
, versionStr
, genBankUri
, dnaFactId
) || genBankUri
!= null){
333 sequence
.setGeneticAccessionNumber(accessionStr
);
338 private boolean isGenBankAccessionNumber(String accessionStr
, String versionStr
, URI genBankUri
, Integer dnaFactId
) {
339 boolean isGenBankAccessionNumber
= accessionStr
.matches("[A-Z]{2}\\d{6}");
340 boolean versionHasGenBankPart
= versionStr
.matches(".*GI:.*");
341 if (isGenBankAccessionNumber
&& versionHasGenBankPart
){
344 if (genBankUri
!= null){
345 logger
.warn("GenBank Uri exists but accession or version have been identified to use GenBank syntax. DNAFactID: " + dnaFactId
);
347 if(isGenBankAccessionNumber
|| versionHasGenBankPart
){
348 logger
.warn("Either accession ("+ accessionStr
+") or version ("+versionStr
+") use GenBank syntax but the other does not. DNAFactID: " + dnaFactId
);
356 protected String
getDerivedUnitNameSpace(){
357 return ECO_FACT_DERIVED_UNIT_NAMESPACE
;
360 protected String
getFieldObservationNameSpace(){
361 return ECO_FACT_FIELD_OBSERVATION_NAMESPACE
;
365 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
, BerlinModelImportState state
) {
369 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
372 Set
<String
> taxonIdSet
= new HashSet
<String
>();
374 Set
<String
> ecoFactFkSet
= new HashSet
<String
>();
377 handleForeignKey(rs
, taxonIdSet
, "taxonId");
378 handleForeignKey(rs
, ecoFactFkSet
, "ecoFactId");
382 nameSpace
= BerlinModelTaxonImport
.NAMESPACE
;
383 cdmClass
= TaxonBase
.class;
385 Map
<String
, TaxonBase
> objectMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
386 result
.put(nameSpace
, objectMap
);
389 //eco fact derived unit map
390 nameSpace
= AlgaTerraFactEcologyImport
.ECO_FACT_DERIVED_UNIT_NAMESPACE
;
391 cdmClass
= DerivedUnit
.class;
392 idSet
= ecoFactFkSet
;
393 Map
<String
, DerivedUnit
> derivedUnitMap
= (Map
<String
, DerivedUnit
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
394 result
.put(nameSpace
, derivedUnitMap
);
396 } catch (SQLException e
) {
397 throw new RuntimeException(e
);
403 protected boolean doCheck(BerlinModelImportState state
){
404 IOValidator
<BerlinModelImportState
> validator
= new AlgaTerraDnaImportValidator();
405 return validator
.validate(state
);
409 protected boolean isIgnore(BerlinModelImportState state
){
410 return ! ((AlgaTerraImportState
)state
).getAlgaTerraConfigurator().isDoDna();