2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.model
.molecular
;
13 import java
.util
.HashSet
;
16 import javax
.persistence
.Entity
;
17 import javax
.persistence
.FetchType
;
18 import javax
.persistence
.ManyToMany
;
19 import javax
.persistence
.ManyToOne
;
20 import javax
.persistence
.Transient
;
21 import javax
.validation
.constraints
.Size
;
22 import javax
.xml
.bind
.annotation
.XmlAccessType
;
23 import javax
.xml
.bind
.annotation
.XmlAccessorType
;
24 import javax
.xml
.bind
.annotation
.XmlAttribute
;
25 import javax
.xml
.bind
.annotation
.XmlElement
;
26 import javax
.xml
.bind
.annotation
.XmlElementWrapper
;
27 import javax
.xml
.bind
.annotation
.XmlIDREF
;
28 import javax
.xml
.bind
.annotation
.XmlRootElement
;
29 import javax
.xml
.bind
.annotation
.XmlSchemaType
;
30 import javax
.xml
.bind
.annotation
.XmlType
;
32 import org
.apache
.log4j
.Logger
;
33 import org
.codehaus
.plexus
.util
.StringUtils
;
34 import org
.hibernate
.annotations
.Cascade
;
35 import org
.hibernate
.annotations
.CascadeType
;
36 import org
.hibernate
.envers
.Audited
;
37 import org
.hibernate
.search
.annotations
.IndexedEmbedded
;
38 import org
.springframework
.beans
.factory
.annotation
.Configurable
;
40 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
41 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTerm
;
42 import eu
.etaxonomy
.cdm
.model
.common
.TermType
;
43 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
44 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
47 * Alignment of multiple single sequences to a consensus sequence,
48 * may also include the extracted barcode sequence.
50 * This class holds information about both the combining process of
51 * {@link SingleSequence single sequences} to one consensus sequence
52 * (singleSequences, contigFile) as well as sequence related information.
53 * The later includes the sequence string itself, important genetic information
54 * (marker, haplotype) as well as registration information (genetic accession number)
55 * citations and barcoding information.
58 * @created 08-Nov-2007 13:06:51
60 * @updated 11-Jul-2013
62 @XmlAccessorType(XmlAccessType
.FIELD
)
63 @XmlType(name
= "Sequence", propOrder
= {
67 "barcodeSequencePart",
69 "geneticAccessionNumber",
76 @XmlRootElement(name
= "Sequencing")
80 //@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
81 public class Sequence
extends AnnotatableEntity
implements Cloneable
{
82 private static final long serialVersionUID
= 8298983152731241775L;
83 private static final Logger logger
= Logger
.getLogger(Sequence
.class);
85 private static final String GENBANK_BASE_URI
= "http://www.ncbi.nlm.nih.gov/nuccore/%s";
86 private static final String EMBL_BASE_URI
= "http://www.ebi.ac.uk/ena/data/view/%s";
87 private static final String DDBJ_BASE_URI
= "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
88 private static final String BOLD_BASE_URI
= "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
90 @XmlElement( name
= "DnaSample")
92 @XmlSchemaType(name
= "IDREF")
93 @ManyToOne(fetch
= FetchType
.LAZY
)
95 private DnaSample dnaSample
;
98 /** @see #getContigFile() */
99 @XmlElement(name
= "ContigFile")
101 @XmlSchemaType(name
= "IDREF")
102 @ManyToOne(fetch
= FetchType
.LAZY
)
103 private Media contigFile
;
105 /** @see #getConsensusSequence() */
106 @XmlElement(name
= "ConsensusSequence")
107 private SequenceString consensusSequence
= SequenceString
.NewInstance();
109 // /**{@link #getSequence()}*/
110 // @XmlElement(name = "Sequence")
112 // private String sequence;
114 // @XmlElement(name = "Length")
115 // private Integer length;
118 @XmlAttribute(name
= "isBarcode")
119 private Boolean isBarcode
= null;
121 /** @see #getBarcodeSequence()*/
122 @XmlElement(name
= "BarcodeSequencePart")
123 private SequenceString barcodeSequencePart
= SequenceString
.NewInstance();
125 /** @see #getGeneticAccessionNumber()*/
126 @XmlElement(name
= "GeneticAccessionNumber")
128 private String geneticAccessionNumber
;
130 /** @see #getBoldProcessId() */
131 @XmlElement(name
= "BoldProcessId")
133 private String boldProcessId
;
135 @XmlElementWrapper(name
= "SingleSequences")
136 @XmlElement(name
= "SingleSequence")
138 @XmlSchemaType(name
= "IDREF")
139 @ManyToMany(fetch
= FetchType
.LAZY
)
140 @Cascade({CascadeType
.SAVE_UPDATE
})
141 private Set
<SingleSequence
> singleSequences
= new HashSet
<SingleSequence
>();
143 /** @see #getMarker() */
144 @XmlElement(name
= "Marker")
146 @XmlSchemaType(name
= "IDREF")
147 @ManyToOne(fetch
= FetchType
.LAZY
)
148 //no cascade as it is a defined term
149 private DefinedTerm marker
;
152 /** @see #getHaplotype() */
153 @XmlElement(name
= "Haplotype")
155 private String haplotype
;
157 /** @see #getCitations() */
158 @XmlElementWrapper(name
= "Citations")
159 @XmlElement(name
= "Citation")
161 @XmlSchemaType(name
= "IDREF")
162 @ManyToMany(fetch
= FetchType
.LAZY
)
163 @Cascade({CascadeType
.SAVE_UPDATE
})
164 private Set
<Reference
> citations
= new HashSet
<Reference
>();
166 // //should be calculated in case sequence is set
167 // @XmlElement (name = "DateSequenced", type= String.class)
168 // @XmlJavaTypeAdapter(DateTimeAdapter.class)
169 // @Type(type="dateTimeUserType")
170 // @Basic(fetch = FetchType.LAZY)
171 // private DateTime dateSequenced;
174 //*********************** FACTORY ****************************************************/
176 public static Sequence
NewInstance(String consensusSequence
){
177 Sequence result
= new Sequence();
178 result
.setSequenceString(consensusSequence
);
182 //*********************** CONSTRUCTOR ****************************************************/
184 protected Sequence() {}
186 //*********************** GETTER / SETTER ****************************************************/
190 * The {@link DnaSample dna sample} this sequencing belongs too.
192 public DnaSample
getDnaSample() {
196 //TODO bidirectionality??
198 * @see #getDnaSample()
200 private void setDnaSample(DnaSample dnaSample
) {
201 this.dnaSample
= dnaSample
;
205 * The consensus sequence achieved by this sequencing.
207 public SequenceString
getConsensusSequence() {
208 return consensusSequence
;
213 * @see #getConsensusSequence()
215 public void setConsensusSequence(SequenceString sequenceString
) {
216 if (sequenceString
== null){
217 sequenceString
= SequenceString
.NewInstance();
219 this.consensusSequence
= sequenceString
;
223 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
224 * a barcode sequence. If the barcode sequence is only a part of the consensus sequence
225 * this part is to be stored as {@link #getBarcodeSequencePart() barcode sequence part}.
226 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
227 * wether the sequence is a barcoding sequence or not.
229 * @see #getBarcodeSequencePart()
230 * @see #getSequenceString()
231 * @returns the isBarcode flag value (tri-state)
234 public Boolean
getIsBarcode() {
239 * @see #getIsBarcode()
240 * @see #getBarcodeSequencePart()
242 public void setIsBarcode(Boolean isBarcode
) {
243 this.isBarcode
= isBarcode
;
247 * If the barcode sequence string does not include 100% of the (consensus) sequence
248 * the part used as barcode is provided here. However, the barcode part
249 * should be kept if consensus sequence string and barcode sequence string are equal.
251 * @see #getIsBarcode()
253 public SequenceString
getBarcodeSequencePart() {
254 return barcodeSequencePart
;
258 * @see #getBarcodeSequence()
260 public void setBarcodeSequence(SequenceString barcodeSequencePart
) {
261 if (barcodeSequencePart
== null){
262 barcodeSequencePart
= SequenceString
.NewInstance();
264 this.barcodeSequencePart
= barcodeSequencePart
;
268 * Sets the {@link TermType#DnaMarker marker} examined and described by this sequencing.
271 public DefinedTerm
getMarker(){
279 public void setMarker(DefinedTerm marker
){
280 this.marker
= marker
;
284 * The accession number used in GenBank, EMBL and DDBJ.
287 public String
getGeneticAccessionNumber() {
288 return geneticAccessionNumber
;
292 * Sets the genetic accession number.
293 * @see #getGeneticAccessionNumber()
295 public void setGeneticAccessionNumber(String geneticAccessionNumber
) {
296 this.geneticAccessionNumber
= geneticAccessionNumber
;
301 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
303 public String
getBoldProcessId() {
304 return boldProcessId
;
307 public void setBoldProcessId(String boldProcessId
) {
308 this.boldProcessId
= boldProcessId
;
312 * Returns the name of the haplotype.
313 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
314 * A certain haplotype may be specific for an individual, a population or a species.
317 public String
getHaplotype() {
322 * @see #getHaplotype()
324 public void setHaplotype(String haplotype
) {
325 this.haplotype
= haplotype
;
329 * The contigFile containing all data and data processing for this sequencing.
331 public Media
getContigFile() {
336 * @see #getContigFile()
338 public void setContigFile(Media contigFile
) {
339 this.contigFile
= contigFile
;
344 * Citations are the set of references in which this sequence was published.
345 * Unlike taxonomic names the first publication of a sequence
346 * is not so important (maybe because it is required by publishers
347 * that they are all registered at Genbank) therefore we do not have something like an
348 * "original reference" attribute.<BR>
349 * Links to these references are to be stored within the reference itself.
350 * @return the set of references in which this sequence was published.
352 public Set
<Reference
> getCitations() {
356 * @see #getCitations()
358 protected void setCitations(Set
<Reference
> citations
) {
359 this.citations
= citations
;
362 * @see #getCitations()
364 public void addCitation(Reference citation
) {
365 this.citations
.add(citation
);
368 * @see #getCitations()
370 public void removeCitation(Reference citation
) {
371 this.citations
.remove(citation
);
375 * The single sequences that where used to create this consensus sequence.
377 public Set
<SingleSequence
> getSingleSequences() {
378 return singleSequences
;
381 * @see #getSingleSequences()
383 public void addSingleSquence(SingleSequence singleSequence
) {
384 this.singleSequences
.add(singleSequence
);
387 * @see #getSingleSequences()
389 public void removeSingleSquence(SingleSequence singleSequence
) {
390 this.singleSequences
.remove(singleSequence
);
393 * @see #getSingleSequences()
395 //TODO private as long it is unclear how bidirectionality is handled
396 private void setSingleSequences(Set
<SingleSequence
> singleSequences
) {
397 this.singleSequences
= singleSequences
;
401 //*************************** Transient GETTER /SETTER *****************************/
404 * Delegate method to get the text representation of the consensus sequence
405 * @see #setSequenceString(String)
408 public String
getSequenceString() {
409 return consensusSequence
.getString();
413 * Delegate method to set the text representation of the {@link #getConsensusSequence()
414 * consensus sequence}.
417 public void setSequenceString(String sequence
) {
418 consensusSequence
.setString(sequence
);
422 * Convenience method which computes the set of all related pherograms
423 * @return the set of pherograms.
426 public Set
<Media
> getPherograms(){
427 Set
<Media
> result
= new HashSet
<Media
>();
428 for (SingleSequence singleSeq
: singleSequences
){
429 if (singleSeq
.getPherogram() != null){
430 result
.add(singleSeq
.getPherogram());
437 //***** Registrations ************/
439 * Returns the computed genBank uri.
443 public URI
getGenBankUri() {
444 return createExternalUri(GENBANK_BASE_URI
);
448 * Returns the computed EMBL uri.
452 public URI
getEmblUri() {
453 return createExternalUri(EMBL_BASE_URI
);
457 * Returns the computed DDBJ uri.
461 public URI
getDdbjUri() {
462 return createExternalUri(DDBJ_BASE_URI
);
466 * Returns the URI for the BOLD entry.
467 * @see #getBoldProcessId()
470 public URI
getBoldUri() {
471 return createExternalUri(BOLD_BASE_URI
);
475 private URI
createExternalUri(String baseUri
){
476 if (StringUtils
.isNotBlank(geneticAccessionNumber
)){
477 return URI
.create(String
.format(baseUri
, geneticAccessionNumber
.trim()));
484 //*********************** CLONE ********************************************************/
486 * Clones <i>this</i> sequence. This is a shortcut that enables to create
487 * a new instance that differs only slightly from <i>this</i> sequencing by
488 * modifying only some of the attributes.<BR><BR>
491 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
492 * @see java.lang.Object#clone()
495 public Object
clone() {
497 Sequence result
= (Sequence
)super.clone();
500 result
.consensusSequence
= (SequenceString
)this.consensusSequence
.clone();
501 result
.barcodeSequencePart
= (SequenceString
)this.barcodeSequencePart
.clone();
505 result
.singleSequences
= new HashSet
<SingleSequence
>();
506 for (SingleSequence seq
: this.singleSequences
){
507 result
.singleSequences
.add((SingleSequence
) seq
);
510 //citations //TODO do we really want to copy these ??
511 result
.citations
= new HashSet
<Reference
>();
512 for (Reference ref
: this.citations
){
513 result
.citations
.add((Reference
) ref
);
519 }catch (CloneNotSupportedException e
) {
520 logger
.warn("Object does not implement cloneable");