2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.model
.molecular
;
13 import java
.util
.HashSet
;
16 import javax
.persistence
.Entity
;
17 import javax
.persistence
.FetchType
;
18 import javax
.persistence
.ManyToMany
;
19 import javax
.persistence
.ManyToOne
;
20 import javax
.persistence
.Transient
;
21 import javax
.validation
.constraints
.Size
;
22 import javax
.xml
.bind
.annotation
.XmlAccessType
;
23 import javax
.xml
.bind
.annotation
.XmlAccessorType
;
24 import javax
.xml
.bind
.annotation
.XmlAttribute
;
25 import javax
.xml
.bind
.annotation
.XmlElement
;
26 import javax
.xml
.bind
.annotation
.XmlElementWrapper
;
27 import javax
.xml
.bind
.annotation
.XmlIDREF
;
28 import javax
.xml
.bind
.annotation
.XmlRootElement
;
29 import javax
.xml
.bind
.annotation
.XmlSchemaType
;
30 import javax
.xml
.bind
.annotation
.XmlType
;
32 import org
.apache
.log4j
.Logger
;
33 import org
.codehaus
.plexus
.util
.StringUtils
;
34 import org
.hibernate
.annotations
.Cascade
;
35 import org
.hibernate
.annotations
.CascadeType
;
36 import org
.hibernate
.envers
.Audited
;
37 import org
.hibernate
.search
.annotations
.IndexedEmbedded
;
38 import org
.springframework
.beans
.factory
.annotation
.Configurable
;
40 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
41 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTerm
;
42 import eu
.etaxonomy
.cdm
.model
.common
.TermType
;
43 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
44 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
47 * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
48 * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
51 * <BR>This class holds information about both the combining process of
52 * {@link SingleRead single sequences} to one consensus sequence
53 * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
54 * as well as sequence related information.
55 * The later includes the {@link #getConsensusSequence() sequence string} itself,
56 * important genetic information about the DNA that has been sequenced
57 * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
58 * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
59 * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
60 * {@link #getBarcodeSequencePart() barcode sequence}, ...).
63 * @created 08-Nov-2007 13:06:51
65 * @updated 11-Jul-2013
67 @XmlAccessorType(XmlAccessType
.FIELD
)
68 @XmlType(name
= "Sequence", propOrder
= {
72 "barcodeSequencePart",
74 "geneticAccessionNumber",
81 @XmlRootElement(name
= "Sequencing")
85 //@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
86 public class Sequence
extends AnnotatableEntity
implements Cloneable
{
87 private static final long serialVersionUID
= 8298983152731241775L;
88 private static final Logger logger
= Logger
.getLogger(Sequence
.class);
90 //TODO move to cdmlib-ext?
91 private static final String GENBANK_BASE_URI
= "http://www.ncbi.nlm.nih.gov/nuccore/%s";
92 private static final String EMBL_BASE_URI
= "http://www.ebi.ac.uk/ena/data/view/%s";
93 private static final String DDBJ_BASE_URI
= "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
94 private static final String BOLD_BASE_URI
= "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
96 @XmlElement( name
= "DnaSample")
98 @XmlSchemaType(name
= "IDREF")
99 @ManyToOne(fetch
= FetchType
.LAZY
)
101 private DnaSample dnaSample
;
104 /** @see #getContigFile() */
105 @XmlElement(name
= "ContigFile")
107 @XmlSchemaType(name
= "IDREF")
108 @ManyToOne(fetch
= FetchType
.LAZY
)
109 private Media contigFile
;
111 /** @see #getConsensusSequence() */
112 @XmlElement(name
= "ConsensusSequence")
113 private SequenceString consensusSequence
= SequenceString
.NewInstance();
115 @XmlAttribute(name
= "isBarcode")
116 private Boolean isBarcode
= null;
118 /** @see #getBarcodeSequence()*/
119 @XmlElement(name
= "BarcodeSequencePart")
120 private SequenceString barcodeSequencePart
= SequenceString
.NewInstance();
122 /** @see #getGeneticAccessionNumber()*/
123 @XmlElement(name
= "GeneticAccessionNumber")
125 private String geneticAccessionNumber
;
127 /** @see #getBoldProcessId() */
128 @XmlElement(name
= "BoldProcessId")
130 private String boldProcessId
;
132 @XmlElementWrapper(name
= "SingleReads")
133 @XmlElement(name
= "SingleRead")
135 @XmlSchemaType(name
= "IDREF")
136 @ManyToMany(fetch
= FetchType
.LAZY
)
137 @Cascade({CascadeType
.SAVE_UPDATE
})
138 private Set
<SingleRead
> singleReads
= new HashSet
<SingleRead
>();
140 /** @see #getDnaMarker() */
141 @XmlElement(name
= "DnaMarker")
143 @XmlSchemaType(name
= "IDREF")
144 @ManyToOne(fetch
= FetchType
.LAZY
)
145 //no cascade as it is a defined term
146 private DefinedTerm dnaMarker
;
149 /** @see #getHaplotype() */
150 @XmlElement(name
= "Haplotype")
152 private String haplotype
;
154 /** @see #getCitations() */
155 @XmlElementWrapper(name
= "Citations")
156 @XmlElement(name
= "Citation")
158 @XmlSchemaType(name
= "IDREF")
159 @ManyToMany(fetch
= FetchType
.LAZY
)
160 @Cascade({CascadeType
.SAVE_UPDATE
})
161 private Set
<Reference
> citations
= new HashSet
<Reference
>();
163 // //should be calculated in case sequence is set
164 // @XmlElement (name = "DateSequenced", type= String.class)
165 // @XmlJavaTypeAdapter(DateTimeAdapter.class)
166 // @Type(type="dateTimeUserType")
167 // @Basic(fetch = FetchType.LAZY)
168 // private DateTime dateSequenced;
171 //*********************** FACTORY ****************************************************/
173 public static Sequence
NewInstance(String consensusSequence
){
174 Sequence result
= new Sequence();
175 result
.setSequenceString(consensusSequence
);
180 public static Sequence
NewInstance(String consensusSequence
, Integer length
){
181 Sequence result
= NewInstance(consensusSequence
);
182 result
.getConsensusSequence().setLength(length
);
185 //*********************** CONSTRUCTOR ****************************************************/
187 protected Sequence() {}
189 //*********************** GETTER / SETTER ****************************************************/
193 * The {@link DnaSample dna sample} this sequencing belongs too.
195 public DnaSample
getDnaSample() {
199 //TODO bidirectionality??
201 * @see #getDnaSample()
203 private void setDnaSample(DnaSample dnaSample
) {
204 this.dnaSample
= dnaSample
;
208 * The resulting consensus sequence represened by this {@link Sequence sequence} .
209 * The consensus is usually computed from the {@link SingleRead single reads}.
210 * The result of which is stored in a file called {@link #getContigFile() contig file}
212 * #see {@link #getContigFile()}
213 * #see {@link #getSingleReads()}
215 public SequenceString
getConsensusSequence() {
216 return consensusSequence
;
221 * @see #getConsensusSequence()
223 public void setConsensusSequence(SequenceString sequenceString
) {
224 if (sequenceString
== null){
225 sequenceString
= SequenceString
.NewInstance();
227 this.consensusSequence
= sequenceString
;
231 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
232 * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
233 * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
234 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
235 * whether the sequence is a barcoding sequence or not.
237 * @see #getBarcodeSequencePart()
238 * @see #getSequenceString()
239 * @returns the isBarcode flag value (tri-state)
242 public Boolean
getIsBarcode() {
247 * @see #getIsBarcode()
248 * @see #getBarcodeSequencePart()
250 public void setIsBarcode(Boolean isBarcode
) {
251 this.isBarcode
= isBarcode
;
255 * If the barcode sequence string does not include 100% of the (consensus) sequence
256 * the part used as barcode is provided here. However, the barcode part
257 * should be kept empty if consensus sequence string and barcode sequence string are equal.
259 * @see #getIsBarcode()
261 public SequenceString
getBarcodeSequencePart() {
262 return barcodeSequencePart
;
266 * @see #getBarcodeSequencePart()
268 public void setBarcodeSequencePart(SequenceString barcodeSequencePart
) {
269 if (barcodeSequencePart
== null){
270 barcodeSequencePart
= SequenceString
.NewInstance();
272 this.barcodeSequencePart
= barcodeSequencePart
;
276 * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
277 * The marker should usually be similar to the one used in the according {@link Amplification
278 * amplification process}. However, it may slightly differ, or, if multiple amplifications where
279 * used to build this consensus sequence it may be the super set of the markers used in amplification.
283 public DefinedTerm
getDnaMarker(){
284 return this.dnaMarker
;
288 * @see #getDnaMarker()
291 public void setDnaMarker(DefinedTerm dnaMarker
){
292 this.dnaMarker
= dnaMarker
;
296 * The accession number used in GenBank, EMBL and DDBJ.
299 public String
getGeneticAccessionNumber() {
300 return geneticAccessionNumber
;
304 * Sets the genetic accession number.
305 * @see #getGeneticAccessionNumber()
307 public void setGeneticAccessionNumber(String geneticAccessionNumber
) {
308 this.geneticAccessionNumber
= geneticAccessionNumber
;
313 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
315 public String
getBoldProcessId() {
316 return boldProcessId
;
319 public void setBoldProcessId(String boldProcessId
) {
320 this.boldProcessId
= boldProcessId
;
324 * Returns the name of the haplotype.
325 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
326 * A certain haplotype may be specific for an individual, a population or a species.
329 public String
getHaplotype() {
334 * @see #getHaplotype()
336 public void setHaplotype(String haplotype
) {
337 this.haplotype
= haplotype
;
341 * The contigFile containing all data and data processing for this sequencing.
343 * @see #getConsensusSequence()
344 * @see #getSingleReads()
346 public Media
getContigFile() {
351 * @see #getContigFile()
353 public void setContigFile(Media contigFile
) {
354 this.contigFile
= contigFile
;
359 * Citations are the set of references in which this sequence was published.
360 * Unlike taxonomic names the first publication of a sequence
361 * is not so important (maybe because it is required by publishers
362 * that they are all registered at Genbank) therefore we do not have something like an
363 * "original reference" attribute.<BR>
364 * Links to these references are to be stored within the reference itself.
365 * @return the set of references in which this sequence was published.
367 public Set
<Reference
> getCitations() {
371 * @see #getCitations()
373 protected void setCitations(Set
<Reference
> citations
) {
374 this.citations
= citations
;
377 * @see #getCitations()
379 public void addCitation(Reference citation
) {
380 this.citations
.add(citation
);
383 * @see #getCitations()
385 public void removeCitation(Reference citation
) {
386 this.citations
.remove(citation
);
390 * The {@link SingleRead single reads} that were used to build this consensus sequence.
392 * @see #getConsensusSequence()
393 * @see #getContigFile()
395 public Set
<SingleRead
> getSingleReads() {
399 * @see #getSingleReads()
401 public void addSingleRead(SingleRead singleRead
) {
402 this.singleReads
.add(singleRead
);
405 * @see #getSingleReads()
407 public void removeSingleRead(SingleRead singleRead
) {
408 this.singleReads
.remove(singleRead
);
411 * @see #getSingleReads()
413 //TODO private as long it is unclear how bidirectionality is handled
414 private void setSingleReads(Set
<SingleRead
> singleReads
) {
415 this.singleReads
= singleReads
;
419 //*************************** Transient GETTER /SETTER *****************************/
422 * Delegate method to get the text representation of the consensus sequence
423 * @see #setSequenceString(String)
426 public String
getSequenceString() {
427 return consensusSequence
.getString();
431 * Delegate method to set the text representation of the {@link #getConsensusSequence()
432 * consensus sequence}.
435 public void setSequenceString(String sequence
) {
436 consensusSequence
.setString(sequence
);
440 * Convenience method which computes the set of all related pherograms
441 * @return the set of pherograms.
444 public Set
<Media
> getPherograms(){
445 Set
<Media
> result
= new HashSet
<Media
>();
446 for (SingleRead singleSeq
: singleReads
){
447 if (singleSeq
.getPherogram() != null){
448 result
.add(singleSeq
.getPherogram());
455 //***** Registrations ************/
457 * Returns the computed genBank uri.
461 public URI
getGenBankUri() {
462 return createExternalUri(GENBANK_BASE_URI
);
466 * Returns the computed EMBL uri.
470 public URI
getEmblUri() {
471 return createExternalUri(EMBL_BASE_URI
);
475 * Returns the computed DDBJ uri.
479 public URI
getDdbjUri() {
480 return createExternalUri(DDBJ_BASE_URI
);
484 * Returns the URI for the BOLD entry.
485 * @see #getBoldProcessId()
488 public URI
getBoldUri() {
489 return createExternalUri(BOLD_BASE_URI
);
493 private URI
createExternalUri(String baseUri
){
494 if (StringUtils
.isNotBlank(geneticAccessionNumber
)){
495 return URI
.create(String
.format(baseUri
, geneticAccessionNumber
.trim()));
502 //*********************** CLONE ********************************************************/
504 * Clones <i>this</i> sequence. This is a shortcut that enables to create
505 * a new instance that differs only slightly from <i>this</i> sequencing by
506 * modifying only some of the attributes.<BR><BR>
509 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
510 * @see java.lang.Object#clone()
513 public Object
clone() {
515 Sequence result
= (Sequence
)super.clone();
518 result
.consensusSequence
= (SequenceString
)this.consensusSequence
.clone();
519 result
.barcodeSequencePart
= (SequenceString
)this.barcodeSequencePart
.clone();
523 result
.singleReads
= new HashSet
<SingleRead
>();
524 for (SingleRead seq
: this.singleReads
){
525 result
.singleReads
.add((SingleRead
) seq
);
528 //citations //TODO do we really want to copy these ??
529 result
.citations
= new HashSet
<Reference
>();
530 for (Reference ref
: this.citations
){
531 result
.citations
.add((Reference
) ref
);
537 }catch (CloneNotSupportedException e
) {
538 logger
.warn("Object does not implement cloneable");