X-Git-Url: https://dev.e-taxonomy.eu/gitweb/cdmlib.git/blobdiff_plain/ee91bcd914145f20130803b468f77d0d897fe65c..9b842959da4064858071d11447383ad745ad14e6:/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java diff --git a/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java b/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java index b67242baf8..59183ef3bb 100644 --- a/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java +++ b/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java @@ -6,25 +6,19 @@ * The contents of this file are subject to the Mozilla Public License Version 1.1 * See LICENSE.TXT at the top of this package for the full license terms. */ - package eu.etaxonomy.cdm.model.molecular; -import eu.etaxonomy.cdm.model.media.IMediaDocumented; -import eu.etaxonomy.cdm.model.media.Media; -import eu.etaxonomy.cdm.model.reference.ReferenceBase; -import eu.etaxonomy.cdm.model.common.IdentifiableEntity; -import eu.etaxonomy.cdm.model.common.IReferencedEntity; -import org.apache.log4j.Logger; -import org.hibernate.annotations.Cascade; -import org.hibernate.annotations.CascadeType; -import org.hibernate.annotations.Index; -import org.hibernate.annotations.Table; -import org.hibernate.envers.Audited; - -import java.util.*; +import java.net.URI; +import java.util.HashSet; +import java.util.Set; -import javax.persistence.*; +import javax.persistence.Entity; +import javax.persistence.FetchType; +import javax.persistence.ManyToMany; +import javax.persistence.ManyToOne; +import javax.persistence.Transient; +import javax.validation.constraints.Size; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlAttribute; @@ -35,211 +29,517 @@ import javax.xml.bind.annotation.XmlRootElement; import javax.xml.bind.annotation.XmlSchemaType; import javax.xml.bind.annotation.XmlType; +import org.apache.log4j.Logger; +import org.codehaus.plexus.util.StringUtils; +import org.hibernate.annotations.Cascade; +import org.hibernate.annotations.CascadeType; +import org.hibernate.envers.Audited; +import org.hibernate.search.annotations.IndexedEmbedded; +import org.springframework.beans.factory.annotation.Configurable; + +import eu.etaxonomy.cdm.model.common.AnnotatableEntity; +import eu.etaxonomy.cdm.model.common.DefinedTerm; +import eu.etaxonomy.cdm.model.common.TermType; +import eu.etaxonomy.cdm.model.media.Media; +import eu.etaxonomy.cdm.model.reference.Reference; + /** + * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence. + * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample}, + * while + * + *
This class holds information about both the combining process of + * {@link SingleRead single sequences} to one consensus sequence + * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} ) + * as well as sequence related information. + * The later includes the {@link #getConsensusSequence() sequence string} itself, + * important genetic information about the DNA that has been sequenced + * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as + * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ), + * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id}, + * {@link #getBarcodeSequencePart() barcode sequence}, ...). + * * @author m.doering - * @version 1.0 * @created 08-Nov-2007 13:06:51 + * @author a.mueller + * @updated 11-Jul-2013 */ @XmlAccessorType(XmlAccessType.FIELD) @XmlType(name = "Sequence", propOrder = { - "sequence", - "length", - "dateSequenced", - "barcode", - "citationMicroReference", - "publishedIn", - "locus", - "citations", - "genBankAccession", - "chromatograms" + "dnaSample", + "consensusSequence", + "isBarcode", + "barcodeSequencePart", + "dnaMarker", + "geneticAccessionNumber", + "boldProcessId", + "haplotype", + "contigFile", + "singleReads", + "citations" }) -@XmlRootElement(name = "Sequence") +@XmlRootElement(name = "Sequencing") @Entity @Audited -@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) }) -public class Sequence extends IdentifiableEntity implements IReferencedEntity, IMediaDocumented{ +@Configurable +//@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) }) +public class Sequence extends AnnotatableEntity implements Cloneable{ private static final long serialVersionUID = 8298983152731241775L; private static final Logger logger = Logger.getLogger(Sequence.class); - //the sequence as a string of base pairs. 5'->3' - @XmlElement(name = "Sequence") - private String sequence; + //TODO move to cdmlib-ext? + private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s"; + private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s"; + private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html"; + private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s"; - //should be calculated in case sequence is set - @XmlElement(name = "Length") - private Integer length; + @XmlElement( name = "DnaSample") + @XmlIDREF + @XmlSchemaType(name = "IDREF") + @ManyToOne(fetch = FetchType.LAZY) + @IndexedEmbedded + private DnaSample dnaSample; + - //should be calculated in case sequence is set - @XmlElement(name = "DateSequenced") - @Temporal(TemporalType.DATE) - private Calendar dateSequenced; + /** @see #getContigFile() */ + @XmlElement(name = "ContigFile") + @XmlIDREF + @XmlSchemaType(name = "IDREF") + @ManyToOne(fetch = FetchType.LAZY) + private Media contigFile; + + /** @see #getConsensusSequence() */ + @XmlElement(name = "ConsensusSequence") + private SequenceString consensusSequence = SequenceString.NewInstance(); - //should be calculated in case sequence is set @XmlAttribute(name = "isBarcode") - private boolean barcode; + private Boolean isBarcode = null; - //the sequence as a string of base pairs. 5'->3' - @XmlElement(name = "CitationMicroReference") - private String citationMicroReference; + /** @see #getBarcodeSequence()*/ + @XmlElement(name = "BarcodeSequencePart") + private SequenceString barcodeSequencePart = SequenceString.NewInstance(); + + /** @see #getGeneticAccessionNumber()*/ + @XmlElement(name = "GeneticAccessionNumber") + @Size(max=20) + private String geneticAccessionNumber; + + /** @see #getBoldProcessId() */ + @XmlElement(name = "BoldProcessId") + @Size(max=20) + private String boldProcessId; - @XmlElement(name = "PublishedIn") + @XmlElementWrapper(name = "SingleReads") + @XmlElement(name = "SingleRead") + @XmlIDREF + @XmlSchemaType(name = "IDREF") + @ManyToMany(fetch = FetchType.LAZY) + @Cascade({CascadeType.SAVE_UPDATE}) + private Set singleReads = new HashSet(); + + /** @see #getDnaMarker() */ + @XmlElement(name = "DnaMarker") @XmlIDREF @XmlSchemaType(name = "IDREF") @ManyToOne(fetch = FetchType.LAZY) - @Cascade(CascadeType.SAVE_UPDATE) - private ReferenceBase publishedIn; + //no cascade as it is a defined term + private DefinedTerm dnaMarker; + + + /** @see #getHaplotype() */ + @XmlElement(name = "Haplotype") + @Size(max=100) + private String haplotype; + /** @see #getCitations() */ @XmlElementWrapper(name = "Citations") - @XmlElement(name = "Citation") + @XmlElement(name = "Citation") @XmlIDREF @XmlSchemaType(name = "IDREF") - @OneToMany(fetch = FetchType.LAZY) - private Set citations = new HashSet(); + @ManyToMany(fetch = FetchType.LAZY) + @Cascade({CascadeType.SAVE_UPDATE}) + private Set citations = new HashSet(); - @XmlElementWrapper(name = "GenBankAccessions") - @XmlElement(name = "GenBankAccession") - @OneToMany(fetch = FetchType.LAZY) - private Set genBankAccession = new HashSet(); +// //should be calculated in case sequence is set +// @XmlElement (name = "DateSequenced", type= String.class) +// @XmlJavaTypeAdapter(DateTimeAdapter.class) +// @Type(type="dateTimeUserType") +// @Basic(fetch = FetchType.LAZY) +// private DateTime dateSequenced; - @XmlElement(name = "Locus") - @XmlIDREF - @XmlSchemaType(name = "IDREF") - @ManyToOne(fetch = FetchType.LAZY) - @Cascade(CascadeType.SAVE_UPDATE) - private Locus locus; - @XmlElementWrapper(name = "Chromatograms") - @XmlElement(name = "Chromatogram") - @XmlIDREF - @XmlSchemaType(name = "IDREF") - @OneToMany(fetch = FetchType.LAZY) - private Set chromatograms = new HashSet(); +//*********************** FACTORY ****************************************************/ - public Locus getLocus(){ - logger.debug("getLocus"); - return this.locus; + public static Sequence NewInstance(String consensusSequence){ + Sequence result = new Sequence(); + result.setSequenceString(consensusSequence); + return result; } - - public void setLocus(Locus locus){ - this.locus = locus; + + + public static Sequence NewInstance(String consensusSequence, Integer length){ + Sequence result = NewInstance(consensusSequence); + result.getConsensusSequence().setLength(length); + return result; } +//*********************** CONSTRUCTOR ****************************************************/ + + protected Sequence() {} - public ReferenceBase getPublishedIn(){ - return this.publishedIn; - } +//*********************** GETTER / SETTER ****************************************************/ - public void setPublishedIn(ReferenceBase publishedIn){ - this.publishedIn = publishedIn; + + /** + * The {@link DnaSample dna sample} this sequencing belongs too. + */ + public DnaSample getDnaSample() { + return dnaSample; } - public Set getCitations() { - return citations; + //TODO bidirectionality?? + /** + * @see #getDnaSample() + */ + private void setDnaSample(DnaSample dnaSample) { + this.dnaSample = dnaSample; } - protected void setCitations(Set citations) { - this.citations = citations; + + /** + * The resulting consensus sequence represened by this {@link Sequence sequence} . + * The consensus is usually computed from the {@link SingleRead single reads}. + * The result of which is stored in a file called {@link #getContigFile() contig file} + * + * #see {@link #getContigFile()} + * #see {@link #getSingleReads()} + */ + public SequenceString getConsensusSequence() { + return consensusSequence; } - public void addCitation(ReferenceBase citation) { - this.citations.add(citation); + + + /** + * @see #getConsensusSequence() + */ + public void setConsensusSequence(SequenceString sequenceString) { + if (sequenceString == null){ + sequenceString = SequenceString.NewInstance(); + } + this.consensusSequence = sequenceString; } - public void removeCitation(ReferenceBase citation) { - this.citations.remove(citation); + + /** + * The isBarcode flag should be set to true if this (consensus) sequence is or includes + * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence + * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}. + * A isBarcode value of null indicates that we do have no knowledge + * whether the sequence is a barcoding sequence or not. + * + * @see #getBarcodeSequencePart() + * @see #getSequenceString() + * @returns the isBarcode flag value (tri-state) + * + */ + public Boolean getIsBarcode() { + return isBarcode; } - public Set getGenBankAccession() { - return genBankAccession; + /** + * @see #getIsBarcode() + * @see #getBarcodeSequencePart() + */ + public void setIsBarcode(Boolean isBarcode) { + this.isBarcode = isBarcode; } - public void addGenBankAccession(GenBankAccession genBankAccession) { - this.genBankAccession.add(genBankAccession); + /** + * If the barcode sequence string does not include 100% of the (consensus) sequence + * the part used as barcode is provided here. However, the barcode part + * should be kept empty if consensus sequence string and barcode sequence string are equal. + * + * @see #getIsBarcode() + */ + public SequenceString getBarcodeSequencePart() { + return barcodeSequencePart; } - - public void removeGenBankAccession(GenBankAccession genBankAccession) { - this.genBankAccession.remove(genBankAccession); + + /** + * @see #getBarcodeSequencePart() + */ + public void setBarcodeSequencePart(SequenceString barcodeSequencePart) { + if (barcodeSequencePart == null){ + barcodeSequencePart = SequenceString.NewInstance(); + } + this.barcodeSequencePart = barcodeSequencePart; } - public Set getChromatograms() { - return chromatograms; + /** + * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing. + * The marker should usually be similar to the one used in the according {@link Amplification + * amplification process}. However, it may slightly differ, or, if multiple amplifications where + * used to build this consensus sequence it may be the super set of the markers used in amplification. + * + * @return + */ + public DefinedTerm getDnaMarker(){ + return this.dnaMarker; + } + + /** + * @see #getDnaMarker() + * @param marker + */ + public void setDnaMarker(DefinedTerm dnaMarker){ + this.dnaMarker = dnaMarker; } - public void addChromatogram(Media chromatogram) { - this.chromatograms.add(chromatogram); + /** + * The accession number used in GenBank, EMBL and DDBJ. + * @return + */ + public String getGeneticAccessionNumber() { + return geneticAccessionNumber; } - - public void removeChromatogram(Media chromatogram) { - this.chromatograms.remove(chromatogram); + + /** + * Sets the genetic accession number. + * @see #getGeneticAccessionNumber() + */ + public void setGeneticAccessionNumber(String geneticAccessionNumber) { + this.geneticAccessionNumber = geneticAccessionNumber; } - public Set getMedia() { - return getChromatograms(); + + /** + * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/). + */ + public String getBoldProcessId() { + return boldProcessId; } - public String getSequence(){ - return this.sequence; + public void setBoldProcessId(String boldProcessId) { + this.boldProcessId = boldProcessId; } /** - * - * @param sequence sequence + * Returns the name of the haplotype. + * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome. + * A certain haplotype may be specific for an individual, a population or a species. + * @return */ - public void setSequence(String sequence){ - this.sequence = sequence; + public String getHaplotype() { + return haplotype; } - public Integer getLength(){ - return this.length; + /** + * @see #getHaplotype() + */ + public void setHaplotype(String haplotype) { + this.haplotype = haplotype; } /** + * The contigFile containing all data and data processing for this sequencing. * - * @param length length + * @see #getConsensusSequence() + * @see #getSingleReads() */ - public void setLength(Integer length){ - this.length = length; + public Media getContigFile() { + return contigFile; } - public Calendar getDateSequenced(){ - return this.dateSequenced; + /** + * @see #getContigFile() + */ + public void setContigFile(Media contigFile) { + this.contigFile = contigFile; + } + + + /** + * Citations are the set of references in which this sequence was published. + * Unlike taxonomic names the first publication of a sequence + * is not so important (maybe because it is required by publishers + * that they are all registered at Genbank) therefore we do not have something like an + * "original reference" attribute.
+ * Links to these references are to be stored within the reference itself. + * @return the set of references in which this sequence was published. + */ + public Set getCitations() { + return citations; + } + /** + * @see #getCitations() + */ + protected void setCitations(Set citations) { + this.citations = citations; + } + /** + * @see #getCitations() + */ + public void addCitation(Reference citation) { + this.citations.add(citation); + } + /** + * @see #getCitations() + */ + public void removeCitation(Reference citation) { + this.citations.remove(citation); } /** + * The {@link SingleRead single reads} that were used to build this consensus sequence. * - * @param dateSequenced dateSequenced + * @see #getConsensusSequence() + * @see #getContigFile() */ - public void setDateSequenced(Calendar dateSequenced){ - this.dateSequenced = dateSequenced; + public Set getSingleReads() { + return singleReads; } + /** + * @see #getSingleReads() + */ + public void addSingleRead(SingleRead singleRead) { + this.singleReads.add(singleRead); + } + /** + * @see #getSingleReads() + */ + public void removeSingleRead(SingleRead singleRead) { + this.singleReads.remove(singleRead); + } + /** + * @see #getSingleReads() + */ + //TODO private as long it is unclear how bidirectionality is handled + private void setSingleReads(Set singleReads) { + this.singleReads = singleReads; + } + + + //*************************** Transient GETTER /SETTER *****************************/ - public boolean isBarcode(){ - return this.barcode; + /** + * Delegate method to get the text representation of the consensus sequence + * @see #setSequenceString(String) + */ + @Transient + public String getSequenceString() { + return consensusSequence.getString(); } /** - * - * @param isBarcode isBarcode + * Delegate method to set the text representation of the {@link #getConsensusSequence() + * consensus sequence}. */ - public void setBarcode(boolean barcode){ - this.barcode = barcode; + @Transient + public void setSequenceString(String sequence) { + consensusSequence.setString(sequence); } + + /** + * Convenience method which computes the set of all related pherograms + * @return the set of pherograms. + */ + @Transient + public Set getPherograms(){ + Set result = new HashSet(); + for (SingleRead singleSeq : singleReads){ + if (singleSeq.getPherogram() != null){ + result.add(singleSeq.getPherogram()); + } + } + return result; + } + - public String getCitationMicroReference(){ - return this.citationMicroReference; + //***** Registrations ************/ + /** + * Returns the computed genBank uri. + * @return + */ + @Transient + public URI getGenBankUri() { + return createExternalUri(GENBANK_BASE_URI); } /** - * - * @param citationMicroReference citationMicroReference + * Returns the computed EMBL uri. + * @return */ - public void setCitationMicroReference(String citationMicroReference){ - this.citationMicroReference = citationMicroReference; + @Transient + public URI getEmblUri() { + return createExternalUri(EMBL_BASE_URI); } - @Override - public String generateTitle(){ - return ""; + /** + * Returns the computed DDBJ uri. + * @return + */ + @Transient + public URI getDdbjUri() { + return createExternalUri(DDBJ_BASE_URI); + } + + /** + * Returns the URI for the BOLD entry. + * @see #getBoldProcessId() + */ + @Transient + public URI getBoldUri() { + return createExternalUri(BOLD_BASE_URI); } - public ReferenceBase getCitation(){ - return publishedIn; + + private URI createExternalUri(String baseUri){ + if (StringUtils.isNotBlank(geneticAccessionNumber)){ + return URI.create(String.format(baseUri, geneticAccessionNumber.trim())); + }else{ + return null; + } + } + + + //*********************** CLONE ********************************************************/ + /** + * Clones this sequence. This is a shortcut that enables to create + * a new instance that differs only slightly from this sequencing by + * modifying only some of the attributes.

+ * + * + * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone() + * @see java.lang.Object#clone() + */ + @Override + public Object clone() { + try{ + Sequence result = (Sequence)super.clone(); + + //sequences + result.consensusSequence = (SequenceString)this.consensusSequence.clone(); + result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone(); + + + //single sequences + result.singleReads = new HashSet(); + for (SingleRead seq: this.singleReads){ + result.singleReads.add((SingleRead) seq); + } + + //citations //TODO do we really want to copy these ?? + result.citations = new HashSet(); + for (Reference ref: this.citations){ + result.citations.add((Reference) ref); + } + + + + return result; + }catch (CloneNotSupportedException e) { + logger.warn("Object does not implement cloneable"); + e.printStackTrace(); + return null; + } } + } \ No newline at end of file