2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.model
.molecular
;
13 import java
.net
.URISyntaxException
;
14 import java
.util
.HashSet
;
17 import javax
.persistence
.Entity
;
18 import javax
.persistence
.FetchType
;
19 import javax
.persistence
.ManyToMany
;
20 import javax
.persistence
.ManyToOne
;
21 import javax
.persistence
.OneToMany
;
22 import javax
.persistence
.Transient
;
23 import javax
.validation
.constraints
.Size
;
24 import javax
.xml
.bind
.annotation
.XmlAccessType
;
25 import javax
.xml
.bind
.annotation
.XmlAccessorType
;
26 import javax
.xml
.bind
.annotation
.XmlAttribute
;
27 import javax
.xml
.bind
.annotation
.XmlElement
;
28 import javax
.xml
.bind
.annotation
.XmlElementWrapper
;
29 import javax
.xml
.bind
.annotation
.XmlIDREF
;
30 import javax
.xml
.bind
.annotation
.XmlRootElement
;
31 import javax
.xml
.bind
.annotation
.XmlSchemaType
;
32 import javax
.xml
.bind
.annotation
.XmlTransient
;
33 import javax
.xml
.bind
.annotation
.XmlType
;
35 import org
.apache
.log4j
.Logger
;
36 import org
.codehaus
.plexus
.util
.StringUtils
;
37 import org
.hibernate
.annotations
.Cascade
;
38 import org
.hibernate
.annotations
.CascadeType
;
39 import org
.hibernate
.envers
.Audited
;
40 import org
.hibernate
.search
.annotations
.IndexedEmbedded
;
41 import org
.springframework
.beans
.factory
.annotation
.Configurable
;
43 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
44 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTerm
;
45 import eu
.etaxonomy
.cdm
.model
.common
.TermType
;
46 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
47 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
50 * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
51 * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
54 * <BR>This class holds information about both the combining process of
55 * {@link SingleRead single sequences} to one consensus sequence
56 * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
57 * as well as sequence related information.
58 * The later includes the {@link #getConsensusSequence() sequence string} itself,
59 * important genetic information about the DNA that has been sequenced
60 * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
61 * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
62 * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
63 * {@link #getBarcodeSequencePart() barcode sequence}, ...).
66 * @created 08-Nov-2007 13:06:51
68 * @updated 11-Jul-2013
70 @XmlAccessorType(XmlAccessType
.FIELD
)
71 @XmlType(name
= "Sequence", propOrder
= {
75 "barcodeSequencePart",
77 "geneticAccessionNumber",
81 "singleReadAlignments",
84 @XmlRootElement(name
= "Sequencing")
88 //@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
89 public class Sequence
extends AnnotatableEntity
implements Cloneable
{
90 private static final long serialVersionUID
= 8298983152731241775L;
91 private static final Logger logger
= Logger
.getLogger(Sequence
.class);
93 //TODO move to cdmlib-ext?
94 private static final String GENBANK_BASE_URI
= "http://www.ncbi.nlm.nih.gov/nuccore/%s";
95 private static final String EMBL_BASE_URI
= "http://www.ebi.ac.uk/ena/data/view/%s";
96 private static final String DDBJ_BASE_URI
= "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
97 private static final String BOLD_BASE_URI
= "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
99 @XmlElement( name
= "DnaSample")
101 @XmlSchemaType(name
= "IDREF")
102 @ManyToOne(fetch
= FetchType
.LAZY
)
104 private DnaSample dnaSample
;
107 /** @see #getContigFile() */
108 @XmlElement(name
= "ContigFile")
110 @XmlSchemaType(name
= "IDREF")
111 @ManyToOne(fetch
= FetchType
.LAZY
)
112 @Cascade({CascadeType
.SAVE_UPDATE
,CascadeType
.MERGE
})
113 private Media contigFile
;
115 /** @see #getConsensusSequence() */
116 @XmlElement(name
= "ConsensusSequence")
117 private SequenceString consensusSequence
= SequenceString
.NewInstance();
119 @XmlAttribute(name
= "isBarcode")
120 private Boolean isBarcode
= null;
122 /** @see #getBarcodeSequence()*/
123 @XmlElement(name
= "BarcodeSequencePart")
124 private SequenceString barcodeSequencePart
= SequenceString
.NewInstance();
126 /** @see #getGeneticAccessionNumber()*/
127 @XmlElement(name
= "GeneticAccessionNumber")
129 private String geneticAccessionNumber
;
131 /** @see #getBoldProcessId() */
132 @XmlElement(name
= "BoldProcessId")
134 private String boldProcessId
;
136 @XmlElementWrapper(name
= "SingleReadAlignments")
137 @XmlElement(name
= "SingleReadAlignment")
139 @XmlSchemaType(name
= "IDREF")
140 @OneToMany(mappedBy
="consensusAlignment", fetch
= FetchType
.LAZY
)
141 @Cascade({CascadeType
.SAVE_UPDATE
,CascadeType
.MERGE
})
142 private Set
<SingleReadAlignment
> singleReadAlignments
= new HashSet
<SingleReadAlignment
>();
144 /** @see #getDnaMarker() */
145 @XmlElement(name
= "DnaMarker")
147 @XmlSchemaType(name
= "IDREF")
148 @ManyToOne(fetch
= FetchType
.LAZY
)
149 //no cascade as it is a defined term
150 private DefinedTerm dnaMarker
;
153 /** @see #getHaplotype() */
154 @XmlElement(name
= "Haplotype")
156 private String haplotype
;
158 /** @see #getCitations() */
159 @XmlElementWrapper(name
= "Citations")
160 @XmlElement(name
= "Citation")
162 @XmlSchemaType(name
= "IDREF")
163 @ManyToMany(fetch
= FetchType
.LAZY
)
164 @Cascade({CascadeType
.SAVE_UPDATE
,CascadeType
.MERGE
})
165 private Set
<Reference
> citations
= new HashSet
<Reference
>();
167 // //should be calculated in case sequence is set
168 // @XmlElement (name = "DateSequenced", type= String.class)
169 // @XmlJavaTypeAdapter(DateTimeAdapter.class)
170 // @Type(type="dateTimeUserType")
171 // @Basic(fetch = FetchType.LAZY)
172 // private DateTime dateSequenced;
175 //*********************** FACTORY ****************************************************/
177 public static Sequence
NewInstance(String consensusSequence
){
178 Sequence result
= new Sequence();
179 result
.setSequenceString(consensusSequence
);
184 public static Sequence
NewInstance(String consensusSequence
, Integer length
){
185 Sequence result
= NewInstance(consensusSequence
);
186 result
.getConsensusSequence().setLength(length
);
190 public static Sequence
NewInstance(DnaSample dnaSample
, String consensusSequence
, Integer length
){
191 Sequence result
= NewInstance(consensusSequence
);
192 result
.getConsensusSequence().setLength(length
);
193 dnaSample
.addSequence(result
);
197 //*********************** CONSTRUCTOR ****************************************************/
199 protected Sequence() {}
201 //*********************** GETTER / SETTER ****************************************************/
205 * The {@link DnaSample dna sample} this sequencing belongs too.
207 public DnaSample
getDnaSample() {
212 * To be called only from {@link DnaSample#addSequence(Sequence)}
213 * @see #getDnaSample()
215 //TODO implement full bidirectionality
216 protected void setDnaSample(DnaSample dnaSample
) {
217 this.dnaSample
= dnaSample
;
218 if (dnaSample
!= null && !dnaSample
.getSequences().contains(this)){
219 throw new RuntimeException("Don't use DNA setter");
224 * The resulting consensus sequence represened by this {@link Sequence sequence} .
225 * The consensus is usually computed from the {@link SingleRead single reads}.
226 * The result of which is stored in a file called {@link #getContigFile() contig file}
228 * #see {@link #getContigFile()}
229 * #see {@link #getSingleReads()}
231 public SequenceString
getConsensusSequence() {
232 return consensusSequence
;
237 * @see #getConsensusSequence()
239 public void setConsensusSequence(SequenceString sequenceString
) {
240 if (sequenceString
== null){
241 sequenceString
= SequenceString
.NewInstance();
243 this.consensusSequence
= sequenceString
;
247 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
248 * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
249 * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
250 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
251 * whether the sequence is a barcoding sequence or not.
253 * @see #getBarcodeSequencePart()
254 * @see #getSequenceString()
255 * @returns the isBarcode flag value (tri-state)
258 public Boolean
getIsBarcode() {
263 * @see #getIsBarcode()
264 * @see #getBarcodeSequencePart()
266 public void setIsBarcode(Boolean isBarcode
) {
267 this.isBarcode
= isBarcode
;
271 * If the barcode sequence string does not include 100% of the (consensus) sequence
272 * the part used as barcode is provided here. However, the barcode part
273 * should be kept empty if consensus sequence string and barcode sequence string are equal.
275 * @see #getIsBarcode()
277 public SequenceString
getBarcodeSequencePart() {
278 return barcodeSequencePart
;
282 * @see #getBarcodeSequencePart()
284 public void setBarcodeSequencePart(SequenceString barcodeSequencePart
) {
285 if (barcodeSequencePart
== null){
286 barcodeSequencePart
= SequenceString
.NewInstance();
288 this.barcodeSequencePart
= barcodeSequencePart
;
292 * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
293 * The marker should usually be similar to the one used in the according {@link Amplification
294 * amplification process}. However, it may slightly differ, or, if multiple amplifications where
295 * used to build this consensus sequence it may be the super set of the markers used in amplification.
299 public DefinedTerm
getDnaMarker(){
300 return this.dnaMarker
;
304 * @see #getDnaMarker()
307 public void setDnaMarker(DefinedTerm dnaMarker
){
308 this.dnaMarker
= dnaMarker
;
312 * The accession number used in GenBank, EMBL and DDBJ.
315 public String
getGeneticAccessionNumber() {
316 return geneticAccessionNumber
;
320 * Sets the genetic accession number.
321 * @see #getGeneticAccessionNumber()
323 public void setGeneticAccessionNumber(String geneticAccessionNumber
) {
324 this.geneticAccessionNumber
= geneticAccessionNumber
;
329 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
331 public String
getBoldProcessId() {
332 return boldProcessId
;
335 public void setBoldProcessId(String boldProcessId
) {
336 this.boldProcessId
= boldProcessId
;
340 * Returns the name of the haplotype.
341 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
342 * A certain haplotype may be specific for an individual, a population or a species.
345 public String
getHaplotype() {
350 * @see #getHaplotype()
352 public void setHaplotype(String haplotype
) {
353 this.haplotype
= haplotype
;
357 * The contigFile containing all data and data processing for this sequencing.
359 * @see #getConsensusSequence()
360 * @see #getSingleReads()
362 public Media
getContigFile() {
367 * @see #getContigFile()
369 public void setContigFile(Media contigFile
) {
370 this.contigFile
= contigFile
;
375 * Citations are the set of references in which this sequence was published.
376 * Unlike taxonomic names the first publication of a sequence
377 * is not so important (maybe because it is required by publishers
378 * that they are all registered at Genbank) therefore we do not have something like an
379 * "original reference" attribute.<BR>
380 * Links to these references are to be stored within the reference itself.
381 * @return the set of references in which this sequence was published.
383 public Set
<Reference
> getCitations() {
387 * @see #getCitations()
389 protected void setCitations(Set
<Reference
> citations
) {
390 this.citations
= citations
;
393 * @see #getCitations()
395 public void addCitation(Reference citation
) {
396 this.citations
.add(citation
);
399 * @see #getCitations()
401 public void removeCitation(Reference citation
) {
402 this.citations
.remove(citation
);
406 * The {@link SingleRead single reads} that were used to build this consensus sequence.
408 * @see #getConsensusSequence()
409 * @see #getContigFile()
411 public Set
<SingleReadAlignment
> getSingleReadAlignments() {
412 return singleReadAlignments
;
415 * @see #getSingleReads()
417 public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment
) {
418 this.singleReadAlignments
.add(singleReadAlignment
);
419 if (! this.equals(singleReadAlignment
.getConsensusSequence())){
420 singleReadAlignment
.setConsensusAlignment(this);
424 * @see #getSingleReads()
426 public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment
) {
427 this.singleReadAlignments
.remove(singleReadAlignment
);
428 if (this.equals(singleReadAlignment
.getConsensusSequence())){
429 singleReadAlignment
.setConsensusAlignment(null);
433 // * @see #getSingleReads()
435 // //TODO private as long it is unclear how bidirectionality is handled
436 // @SuppressWarnings("unused")
437 // private void setSingleReadAlignments(Set<SingleReadAlignment> singleReadAlignments) {
438 // this.singleReadAlignments = singleReadAlignments;
441 // *********************** CONVENIENCE ***********************************/
444 * Convenience method to add a single read to a consensus sequence
445 * by creating a {@link SingleReadAlignment}.
446 * @param singleRead the {@link SingleRead} to add
447 * @return the created SingleReadAlignment
449 public SingleReadAlignment
addSingleRead(SingleRead singleRead
) {
450 SingleReadAlignment alignment
= SingleReadAlignment
.NewInstance(this, singleRead
);
454 public void removeSingleRead(SingleRead singleRead
) {
455 Set
<SingleReadAlignment
> toRemove
= new HashSet
<SingleReadAlignment
>();
456 for (SingleReadAlignment align
: this.singleReadAlignments
){
457 if (align
.getSingleRead() != null && align
.getSingleRead().equals(singleRead
)){
461 for (SingleReadAlignment align
: toRemove
){
462 removeSingleReadAlignment(align
);
468 * Convenience method that returns all single reads this consensus sequence
469 * is based on via {@link SingleReadAlignment}s.
470 * @return set of related single reads
474 public Set
<SingleRead
> getSingleReads(){
475 Set
<SingleRead
> singleReads
= new HashSet
<SingleRead
>();
476 for (SingleReadAlignment align
: this.singleReadAlignments
){
477 if (align
.getSingleRead() != null){ // == null should not happen
478 singleReads
.add(align
.getSingleRead());
485 //*************************** Transient GETTER /SETTER *****************************/
488 * Delegate method to get the text representation of the consensus sequence
489 * @see #setSequenceString(String)
492 public String
getSequenceString() {
493 return consensusSequence
.getString();
497 * Delegate method to set the text representation of the {@link #getConsensusSequence()
498 * consensus sequence}.
501 public void setSequenceString(String sequence
) {
502 consensusSequence
.setString(sequence
);
506 * Convenience method which computes the set of all related pherograms
507 * @return the set of pherograms.
510 public Set
<Media
> getPherograms(){
511 Set
<Media
> result
= new HashSet
<Media
>();
512 for (SingleReadAlignment singleReadAlign
: singleReadAlignments
){
513 if (singleReadAlign
.getSingleRead() != null && singleReadAlign
.getSingleRead().getPherogram() != null){
514 result
.add(singleReadAlign
.getSingleRead().getPherogram());
521 //***** Registrations ************/
523 * Returns the computed genBank uri.
524 * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber}
525 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
528 public URI
getGenBankUri() throws URISyntaxException
{
529 return createExternalUri(GENBANK_BASE_URI
, geneticAccessionNumber
);
533 * Returns the computed EMBL uri.
534 * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber}
535 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
538 public URI
getEmblUri() throws URISyntaxException
{
539 return createExternalUri(EMBL_BASE_URI
, geneticAccessionNumber
);
543 * Returns the computed DDBJ uri.
544 * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber}
545 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
548 public URI
getDdbjUri() throws URISyntaxException
{
549 return createExternalUri(DDBJ_BASE_URI
, geneticAccessionNumber
);
553 * Returns the URI for the BOLD entry.
554 * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId}
555 * @throws URISyntaxException when URI could not be created with {@link #boldProcessId}
556 * @see #getBoldProcessId()
559 public URI
getBoldUri() throws URISyntaxException
{
560 return createExternalUri(BOLD_BASE_URI
, boldProcessId
);
563 private URI
createExternalUri(String baseUri
, String id
) throws URISyntaxException
{
564 if (StringUtils
.isNotBlank(id
)){
565 return new URI(String
.format(baseUri
, id
.trim()));
574 //*********************** CLONE ********************************************************/
576 * Clones <i>this</i> sequence. This is a shortcut that enables to create
577 * a new instance that differs only slightly from <i>this</i> sequencing by
578 * modifying only some of the attributes.<BR><BR>
581 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
582 * @see java.lang.Object#clone()
585 public Object
clone() {
587 Sequence result
= (Sequence
)super.clone();
590 result
.consensusSequence
= (SequenceString
)this.consensusSequence
.clone();
591 result
.barcodeSequencePart
= (SequenceString
)this.barcodeSequencePart
.clone();
595 result
.singleReadAlignments
= new HashSet
<SingleReadAlignment
>();
596 for (SingleReadAlignment singleReadAlign
: this.singleReadAlignments
){
597 SingleReadAlignment newAlignment
= (SingleReadAlignment
)singleReadAlign
.clone();
598 result
.singleReadAlignments
.add(newAlignment
);
601 //citations //TODO do we really want to copy these ??
602 result
.citations
= new HashSet
<Reference
>();
603 for (Reference
<?
> ref
: this.citations
){
604 result
.citations
.add(ref
);
610 }catch (CloneNotSupportedException e
) {
611 logger
.warn("Object does not implement cloneable");