* The contents of this file are subject to the Mozilla Public License Version 1.1
* See LICENSE.TXT at the top of this package for the full license terms.
*/
-
package eu.etaxonomy.cdm.model.molecular;
-import eu.etaxonomy.cdm.model.media.IMediaDocumented;
-import eu.etaxonomy.cdm.model.media.Media;
-import eu.etaxonomy.cdm.model.reference.ReferenceBase;
-import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
-import eu.etaxonomy.cdm.model.common.IReferencedEntity;
-import org.apache.log4j.Logger;
-import org.hibernate.annotations.Cascade;
-import org.hibernate.annotations.CascadeType;
-import org.hibernate.annotations.Index;
-import org.hibernate.annotations.Table;
-import org.hibernate.envers.Audited;
-
-import java.util.*;
+import java.net.URI;
+import java.util.HashSet;
+import java.util.Set;
-import javax.persistence.*;
+import javax.persistence.Entity;
+import javax.persistence.FetchType;
+import javax.persistence.ManyToMany;
+import javax.persistence.ManyToOne;
+import javax.persistence.Transient;
+import javax.validation.constraints.Size;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlSchemaType;
import javax.xml.bind.annotation.XmlType;
+import org.apache.log4j.Logger;
+import org.codehaus.plexus.util.StringUtils;
+import org.hibernate.annotations.Cascade;
+import org.hibernate.annotations.CascadeType;
+import org.hibernate.envers.Audited;
+import org.hibernate.search.annotations.IndexedEmbedded;
+import org.springframework.beans.factory.annotation.Configurable;
+
+import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
+import eu.etaxonomy.cdm.model.common.DefinedTerm;
+import eu.etaxonomy.cdm.model.common.TermType;
+import eu.etaxonomy.cdm.model.media.Media;
+import eu.etaxonomy.cdm.model.reference.Reference;
+
/**
+ * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
+ * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
+ * while
+ *
+ * <BR>This class holds information about both the combining process of
+ * {@link SingleRead single sequences} to one consensus sequence
+ * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
+ * as well as sequence related information.
+ * The later includes the {@link #getConsensusSequence() sequence string} itself,
+ * important genetic information about the DNA that has been sequenced
+ * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
+ * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
+ * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
+ * {@link #getBarcodeSequencePart() barcode sequence}, ...).
+ *
* @author m.doering
- * @version 1.0
* @created 08-Nov-2007 13:06:51
+ * @author a.mueller
+ * @updated 11-Jul-2013
*/
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name = "Sequence", propOrder = {
- "sequence",
- "length",
- "dateSequenced",
- "barcode",
- "citationMicroReference",
- "publishedIn",
- "locus",
- "citations",
- "genBankAccession",
- "chromatograms"
+ "dnaSample",
+ "consensusSequence",
+ "isBarcode",
+ "barcodeSequencePart",
+ "dnaMarker",
+ "geneticAccessionNumber",
+ "boldProcessId",
+ "haplotype",
+ "contigFile",
+ "singleReads",
+ "citations"
})
-@XmlRootElement(name = "Sequence")
+@XmlRootElement(name = "Sequencing")
@Entity
@Audited
-@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
-public class Sequence extends IdentifiableEntity implements IReferencedEntity, IMediaDocumented{
+@Configurable
+//@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
+public class Sequence extends AnnotatableEntity implements Cloneable{
private static final long serialVersionUID = 8298983152731241775L;
private static final Logger logger = Logger.getLogger(Sequence.class);
- //the sequence as a string of base pairs. 5'->3'
- @XmlElement(name = "Sequence")
- private String sequence;
+ //TODO move to cdmlib-ext?
+ private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
+ private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
+ private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
+ private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
- //should be calculated in case sequence is set
- @XmlElement(name = "Length")
- private Integer length;
+ @XmlElement( name = "DnaSample")
+ @XmlIDREF
+ @XmlSchemaType(name = "IDREF")
+ @ManyToOne(fetch = FetchType.LAZY)
+ @IndexedEmbedded
+ private DnaSample dnaSample;
+
- //should be calculated in case sequence is set
- @XmlElement(name = "DateSequenced")
- @Temporal(TemporalType.DATE)
- private Calendar dateSequenced;
+ /** @see #getContigFile() */
+ @XmlElement(name = "ContigFile")
+ @XmlIDREF
+ @XmlSchemaType(name = "IDREF")
+ @ManyToOne(fetch = FetchType.LAZY)
+ private Media contigFile;
+
+ /** @see #getConsensusSequence() */
+ @XmlElement(name = "ConsensusSequence")
+ private SequenceString consensusSequence = SequenceString.NewInstance();
- //should be calculated in case sequence is set
@XmlAttribute(name = "isBarcode")
- private boolean barcode;
+ private Boolean isBarcode = null;
- //the sequence as a string of base pairs. 5'->3'
- @XmlElement(name = "CitationMicroReference")
- private String citationMicroReference;
+ /** @see #getBarcodeSequence()*/
+ @XmlElement(name = "BarcodeSequencePart")
+ private SequenceString barcodeSequencePart = SequenceString.NewInstance();
+
+ /** @see #getGeneticAccessionNumber()*/
+ @XmlElement(name = "GeneticAccessionNumber")
+ @Size(max=20)
+ private String geneticAccessionNumber;
+
+ /** @see #getBoldProcessId() */
+ @XmlElement(name = "BoldProcessId")
+ @Size(max=20)
+ private String boldProcessId;
- @XmlElement(name = "PublishedIn")
+ @XmlElementWrapper(name = "SingleReads")
+ @XmlElement(name = "SingleRead")
+ @XmlIDREF
+ @XmlSchemaType(name = "IDREF")
+ @ManyToMany(fetch = FetchType.LAZY)
+ @Cascade({CascadeType.SAVE_UPDATE})
+ private Set<SingleRead> singleReads = new HashSet<SingleRead>();
+
+ /** @see #getDnaMarker() */
+ @XmlElement(name = "DnaMarker")
@XmlIDREF
@XmlSchemaType(name = "IDREF")
@ManyToOne(fetch = FetchType.LAZY)
- @Cascade(CascadeType.SAVE_UPDATE)
- private ReferenceBase publishedIn;
+ //no cascade as it is a defined term
+ private DefinedTerm dnaMarker;
+
+
+ /** @see #getHaplotype() */
+ @XmlElement(name = "Haplotype")
+ @Size(max=100)
+ private String haplotype;
+ /** @see #getCitations() */
@XmlElementWrapper(name = "Citations")
- @XmlElement(name = "Citation")
+ @XmlElement(name = "Citation")
@XmlIDREF
@XmlSchemaType(name = "IDREF")
- @OneToMany(fetch = FetchType.LAZY)
- private Set<ReferenceBase> citations = new HashSet<ReferenceBase>();
+ @ManyToMany(fetch = FetchType.LAZY)
+ @Cascade({CascadeType.SAVE_UPDATE})
+ private Set<Reference> citations = new HashSet<Reference>();
- @XmlElementWrapper(name = "GenBankAccessions")
- @XmlElement(name = "GenBankAccession")
- @OneToMany(fetch = FetchType.LAZY)
- private Set<GenBankAccession> genBankAccession = new HashSet<GenBankAccession>();
+// //should be calculated in case sequence is set
+// @XmlElement (name = "DateSequenced", type= String.class)
+// @XmlJavaTypeAdapter(DateTimeAdapter.class)
+// @Type(type="dateTimeUserType")
+// @Basic(fetch = FetchType.LAZY)
+// private DateTime dateSequenced;
- @XmlElement(name = "Locus")
- @XmlIDREF
- @XmlSchemaType(name = "IDREF")
- @ManyToOne(fetch = FetchType.LAZY)
- @Cascade(CascadeType.SAVE_UPDATE)
- private Locus locus;
- @XmlElementWrapper(name = "Chromatograms")
- @XmlElement(name = "Chromatogram")
- @XmlIDREF
- @XmlSchemaType(name = "IDREF")
- @OneToMany(fetch = FetchType.LAZY)
- private Set<Media> chromatograms = new HashSet<Media>();
+//*********************** FACTORY ****************************************************/
- public Locus getLocus(){
- logger.debug("getLocus");
- return this.locus;
+ public static Sequence NewInstance(String consensusSequence){
+ Sequence result = new Sequence();
+ result.setSequenceString(consensusSequence);
+ return result;
}
-
- public void setLocus(Locus locus){
- this.locus = locus;
+
+
+ public static Sequence NewInstance(String consensusSequence, Integer length){
+ Sequence result = NewInstance(consensusSequence);
+ result.getConsensusSequence().setLength(length);
+ return result;
}
+//*********************** CONSTRUCTOR ****************************************************/
+
+ protected Sequence() {}
- public ReferenceBase getPublishedIn(){
- return this.publishedIn;
- }
+//*********************** GETTER / SETTER ****************************************************/
- public void setPublishedIn(ReferenceBase publishedIn){
- this.publishedIn = publishedIn;
+
+ /**
+ * The {@link DnaSample dna sample} this sequencing belongs too.
+ */
+ public DnaSample getDnaSample() {
+ return dnaSample;
}
- public Set<ReferenceBase> getCitations() {
- return citations;
+ //TODO bidirectionality??
+ /**
+ * @see #getDnaSample()
+ */
+ private void setDnaSample(DnaSample dnaSample) {
+ this.dnaSample = dnaSample;
}
- protected void setCitations(Set<ReferenceBase> citations) {
- this.citations = citations;
+
+ /**
+ * The resulting consensus sequence represened by this {@link Sequence sequence} .
+ * The consensus is usually computed from the {@link SingleRead single reads}.
+ * The result of which is stored in a file called {@link #getContigFile() contig file}
+ *
+ * #see {@link #getContigFile()}
+ * #see {@link #getSingleReads()}
+ */
+ public SequenceString getConsensusSequence() {
+ return consensusSequence;
}
- public void addCitation(ReferenceBase citation) {
- this.citations.add(citation);
+
+
+ /**
+ * @see #getConsensusSequence()
+ */
+ public void setConsensusSequence(SequenceString sequenceString) {
+ if (sequenceString == null){
+ sequenceString = SequenceString.NewInstance();
+ }
+ this.consensusSequence = sequenceString;
}
- public void removeCitation(ReferenceBase citation) {
- this.citations.remove(citation);
+
+ /**
+ * The isBarcode flag should be set to true if this (consensus) sequence is or includes
+ * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
+ * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
+ * A isBarcode value of <code>null</code> indicates that we do have no knowledge
+ * whether the sequence is a barcoding sequence or not.
+ *
+ * @see #getBarcodeSequencePart()
+ * @see #getSequenceString()
+ * @returns the isBarcode flag value (tri-state)
+ *
+ */
+ public Boolean getIsBarcode() {
+ return isBarcode;
}
- public Set<GenBankAccession> getGenBankAccession() {
- return genBankAccession;
+ /**
+ * @see #getIsBarcode()
+ * @see #getBarcodeSequencePart()
+ */
+ public void setIsBarcode(Boolean isBarcode) {
+ this.isBarcode = isBarcode;
}
- public void addGenBankAccession(GenBankAccession genBankAccession) {
- this.genBankAccession.add(genBankAccession);
+ /**
+ * If the barcode sequence string does not include 100% of the (consensus) sequence
+ * the part used as barcode is provided here. However, the barcode part
+ * should be kept empty if consensus sequence string and barcode sequence string are equal.
+ *
+ * @see #getIsBarcode()
+ */
+ public SequenceString getBarcodeSequencePart() {
+ return barcodeSequencePart;
}
-
- public void removeGenBankAccession(GenBankAccession genBankAccession) {
- this.genBankAccession.remove(genBankAccession);
+
+ /**
+ * @see #getBarcodeSequencePart()
+ */
+ public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
+ if (barcodeSequencePart == null){
+ barcodeSequencePart = SequenceString.NewInstance();
+ }
+ this.barcodeSequencePart = barcodeSequencePart;
}
- public Set<Media> getChromatograms() {
- return chromatograms;
+ /**
+ * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
+ * The marker should usually be similar to the one used in the according {@link Amplification
+ * amplification process}. However, it may slightly differ, or, if multiple amplifications where
+ * used to build this consensus sequence it may be the super set of the markers used in amplification.
+ *
+ * @return
+ */
+ public DefinedTerm getDnaMarker(){
+ return this.dnaMarker;
+ }
+
+ /**
+ * @see #getDnaMarker()
+ * @param marker
+ */
+ public void setDnaMarker(DefinedTerm dnaMarker){
+ this.dnaMarker = dnaMarker;
}
- public void addChromatogram(Media chromatogram) {
- this.chromatograms.add(chromatogram);
+ /**
+ * The accession number used in GenBank, EMBL and DDBJ.
+ * @return
+ */
+ public String getGeneticAccessionNumber() {
+ return geneticAccessionNumber;
}
-
- public void removeChromatogram(Media chromatogram) {
- this.chromatograms.remove(chromatogram);
+
+ /**
+ * Sets the genetic accession number.
+ * @see #getGeneticAccessionNumber()
+ */
+ public void setGeneticAccessionNumber(String geneticAccessionNumber) {
+ this.geneticAccessionNumber = geneticAccessionNumber;
}
- public Set<Media> getMedia() {
- return getChromatograms();
+
+ /**
+ * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
+ */
+ public String getBoldProcessId() {
+ return boldProcessId;
}
- public String getSequence(){
- return this.sequence;
+ public void setBoldProcessId(String boldProcessId) {
+ this.boldProcessId = boldProcessId;
}
/**
- *
- * @param sequence sequence
+ * Returns the name of the haplotype.
+ * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
+ * A certain haplotype may be specific for an individual, a population or a species.
+ * @return
*/
- public void setSequence(String sequence){
- this.sequence = sequence;
+ public String getHaplotype() {
+ return haplotype;
}
- public Integer getLength(){
- return this.length;
+ /**
+ * @see #getHaplotype()
+ */
+ public void setHaplotype(String haplotype) {
+ this.haplotype = haplotype;
}
/**
+ * The contigFile containing all data and data processing for this sequencing.
*
- * @param length length
+ * @see #getConsensusSequence()
+ * @see #getSingleReads()
*/
- public void setLength(Integer length){
- this.length = length;
+ public Media getContigFile() {
+ return contigFile;
}
- public Calendar getDateSequenced(){
- return this.dateSequenced;
+ /**
+ * @see #getContigFile()
+ */
+ public void setContigFile(Media contigFile) {
+ this.contigFile = contigFile;
+ }
+
+
+ /**
+ * Citations are the set of references in which this sequence was published.
+ * Unlike taxonomic names the first publication of a sequence
+ * is not so important (maybe because it is required by publishers
+ * that they are all registered at Genbank) therefore we do not have something like an
+ * "original reference" attribute.<BR>
+ * Links to these references are to be stored within the reference itself.
+ * @return the set of references in which this sequence was published.
+ */
+ public Set<Reference> getCitations() {
+ return citations;
+ }
+ /**
+ * @see #getCitations()
+ */
+ protected void setCitations(Set<Reference> citations) {
+ this.citations = citations;
+ }
+ /**
+ * @see #getCitations()
+ */
+ public void addCitation(Reference citation) {
+ this.citations.add(citation);
+ }
+ /**
+ * @see #getCitations()
+ */
+ public void removeCitation(Reference citation) {
+ this.citations.remove(citation);
}
/**
+ * The {@link SingleRead single reads} that were used to build this consensus sequence.
*
- * @param dateSequenced dateSequenced
+ * @see #getConsensusSequence()
+ * @see #getContigFile()
*/
- public void setDateSequenced(Calendar dateSequenced){
- this.dateSequenced = dateSequenced;
+ public Set<SingleRead> getSingleReads() {
+ return singleReads;
}
+ /**
+ * @see #getSingleReads()
+ */
+ public void addSingleRead(SingleRead singleRead) {
+ this.singleReads.add(singleRead);
+ }
+ /**
+ * @see #getSingleReads()
+ */
+ public void removeSingleRead(SingleRead singleRead) {
+ this.singleReads.remove(singleRead);
+ }
+ /**
+ * @see #getSingleReads()
+ */
+ //TODO private as long it is unclear how bidirectionality is handled
+ private void setSingleReads(Set<SingleRead> singleReads) {
+ this.singleReads = singleReads;
+ }
+
+
+ //*************************** Transient GETTER /SETTER *****************************/
- public boolean isBarcode(){
- return this.barcode;
+ /**
+ * Delegate method to get the text representation of the consensus sequence
+ * @see #setSequenceString(String)
+ */
+ @Transient
+ public String getSequenceString() {
+ return consensusSequence.getString();
}
/**
- *
- * @param isBarcode isBarcode
+ * Delegate method to set the text representation of the {@link #getConsensusSequence()
+ * consensus sequence}.
*/
- public void setBarcode(boolean barcode){
- this.barcode = barcode;
+ @Transient
+ public void setSequenceString(String sequence) {
+ consensusSequence.setString(sequence);
}
+
+ /**
+ * Convenience method which computes the set of all related pherograms
+ * @return the set of pherograms.
+ */
+ @Transient
+ public Set<Media> getPherograms(){
+ Set<Media> result = new HashSet<Media>();
+ for (SingleRead singleSeq : singleReads){
+ if (singleSeq.getPherogram() != null){
+ result.add(singleSeq.getPherogram());
+ }
+ }
+ return result;
+ }
+
- public String getCitationMicroReference(){
- return this.citationMicroReference;
+ //***** Registrations ************/
+ /**
+ * Returns the computed genBank uri.
+ * @return
+ */
+ @Transient
+ public URI getGenBankUri() {
+ return createExternalUri(GENBANK_BASE_URI);
}
/**
- *
- * @param citationMicroReference citationMicroReference
+ * Returns the computed EMBL uri.
+ * @return
*/
- public void setCitationMicroReference(String citationMicroReference){
- this.citationMicroReference = citationMicroReference;
+ @Transient
+ public URI getEmblUri() {
+ return createExternalUri(EMBL_BASE_URI);
}
- @Override
- public String generateTitle(){
- return "";
+ /**
+ * Returns the computed DDBJ uri.
+ * @return
+ */
+ @Transient
+ public URI getDdbjUri() {
+ return createExternalUri(DDBJ_BASE_URI);
+ }
+
+ /**
+ * Returns the URI for the BOLD entry.
+ * @see #getBoldProcessId()
+ */
+ @Transient
+ public URI getBoldUri() {
+ return createExternalUri(BOLD_BASE_URI);
}
- public ReferenceBase getCitation(){
- return publishedIn;
+
+ private URI createExternalUri(String baseUri){
+ if (StringUtils.isNotBlank(geneticAccessionNumber)){
+ return URI.create(String.format(baseUri, geneticAccessionNumber.trim()));
+ }else{
+ return null;
+ }
+ }
+
+
+ //*********************** CLONE ********************************************************/
+ /**
+ * Clones <i>this</i> sequence. This is a shortcut that enables to create
+ * a new instance that differs only slightly from <i>this</i> sequencing by
+ * modifying only some of the attributes.<BR><BR>
+ *
+ *
+ * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
+ * @see java.lang.Object#clone()
+ */
+ @Override
+ public Object clone() {
+ try{
+ Sequence result = (Sequence)super.clone();
+
+ //sequences
+ result.consensusSequence = (SequenceString)this.consensusSequence.clone();
+ result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
+
+
+ //single sequences
+ result.singleReads = new HashSet<SingleRead>();
+ for (SingleRead seq: this.singleReads){
+ result.singleReads.add((SingleRead) seq);
+ }
+
+ //citations //TODO do we really want to copy these ??
+ result.citations = new HashSet<Reference>();
+ for (Reference ref: this.citations){
+ result.citations.add((Reference) ref);
+ }
+
+
+
+ return result;
+ }catch (CloneNotSupportedException e) {
+ logger.warn("Object does not implement cloneable");
+ e.printStackTrace();
+ return null;
+ }
}
+
}
\ No newline at end of file