merge trunk to cdm-3.3 branch
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / model / molecular / Sequence.java
index 1f912662fd6e9045b91e47a7645ab64127845be9..cca82875c7c422685bf9a1e796ddd8255205fce5 100644 (file)
@@ -6,19 +6,19 @@
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * See LICENSE.TXT at the top of this package for the full license terms.
 */
-
 package eu.etaxonomy.cdm.model.molecular;
 
 
+import java.net.URI;
 import java.util.HashSet;
 import java.util.Set;
 
-import javax.persistence.Basic;
 import javax.persistence.Entity;
 import javax.persistence.FetchType;
+import javax.persistence.ManyToMany;
 import javax.persistence.ManyToOne;
-import javax.persistence.OneToMany;
 import javax.persistence.Transient;
+import javax.validation.constraints.Size;
 import javax.xml.bind.annotation.XmlAccessType;
 import javax.xml.bind.annotation.XmlAccessorType;
 import javax.xml.bind.annotation.XmlAttribute;
@@ -28,254 +28,463 @@ import javax.xml.bind.annotation.XmlIDREF;
 import javax.xml.bind.annotation.XmlRootElement;
 import javax.xml.bind.annotation.XmlSchemaType;
 import javax.xml.bind.annotation.XmlType;
-import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
 
 import org.apache.log4j.Logger;
+import org.codehaus.plexus.util.StringUtils;
 import org.hibernate.annotations.Cascade;
 import org.hibernate.annotations.CascadeType;
-import org.hibernate.annotations.Index;
-import org.hibernate.annotations.Table;
-import org.hibernate.annotations.Type;
 import org.hibernate.envers.Audited;
-import org.joda.time.DateTime;
+import org.hibernate.search.annotations.IndexedEmbedded;
 import org.springframework.beans.factory.annotation.Configurable;
 
-import eu.etaxonomy.cdm.jaxb.DateTimeAdapter;
-import eu.etaxonomy.cdm.model.common.IReferencedEntity;
-import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
-import eu.etaxonomy.cdm.model.media.IMediaDocumented;
+import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
+import eu.etaxonomy.cdm.model.common.DefinedTerm;
+import eu.etaxonomy.cdm.model.common.TermType;
 import eu.etaxonomy.cdm.model.media.Media;
 import eu.etaxonomy.cdm.model.reference.Reference;
-import eu.etaxonomy.cdm.strategy.cache.common.IIdentifiableEntityCacheStrategy;
-import eu.etaxonomy.cdm.strategy.cache.common.IdentifiableEntityDefaultCacheStrategy;
 
 /**
+ * Alignment of multiple single sequences to a consensus sequence, 
+ * may also include the extracted barcode sequence.
+ * 
+ * This class holds information about both the combining process of 
+ * {@link SingleSequence single sequences} to one consensus sequence
+ * (singleSequences, contigFile) as well as sequence related information.
+ * The later includes the sequence string itself, important genetic information
+ * (marker, haplotype) as well as registration information (genetic accession number)
+ * citations and barcoding information.
+ * 
  * @author m.doering
- * @version 1.0
  * @created 08-Nov-2007 13:06:51
+ * @author a.mueller
+ * @updated 11-Jul-2013
  */
 @XmlAccessorType(XmlAccessType.FIELD)
 @XmlType(name = "Sequence", propOrder = {
-    "sequence",
-    "length",
-    "dateSequenced",
-    "barcode",
-    "citationMicroReference",
-    "publishedIn",
-    "locus",
-    "citations",
-    "genBankAccession",
-    "chromatograms"
+    "dnaSample",
+       "consensusSequence",
+       "isBarcode",
+    "barcodeSequencePart",
+    "marker",
+    "geneticAccessionNumber",
+    "boldProcessId",
+    "haplotype",
+    "contigFile",
+    "singleSequences",
+    "citations"
 })
-@XmlRootElement(name = "Sequence")
+@XmlRootElement(name = "Sequencing")
 @Entity
 @Audited
 @Configurable
-@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
-public class Sequence extends IdentifiableEntity<IIdentifiableEntityCacheStrategy<Sequence>> implements IReferencedEntity, IMediaDocumented{
+//@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
+public class Sequence extends AnnotatableEntity implements Cloneable{
        private static final long serialVersionUID = 8298983152731241775L;
        private static final Logger logger = Logger.getLogger(Sequence.class);
        
-       //the sequence as a string of base pairs. 5'->3'
-       @XmlElement(name = "Sequence")
-       private String sequence;
+       private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
+       private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
+       private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
+       private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
+       
+    @XmlElement( name = "DnaSample")
+    @XmlIDREF
+    @XmlSchemaType(name = "IDREF")
+    @ManyToOne(fetch = FetchType.LAZY)
+    @IndexedEmbedded
+    private DnaSample dnaSample;
+    
        
-       //should be calculated in case sequence is set
-       @XmlElement(name = "Length")
-       private Integer length;
+       /** @see #getContigFile() */
+       @XmlElement(name = "ContigFile")
+    @XmlIDREF
+    @XmlSchemaType(name = "IDREF")
+    @ManyToOne(fetch = FetchType.LAZY)
+       private Media contigFile;
+    
+       /** @see #getConsensusSequence() */
+       @XmlElement(name = "ConsensusSequence")
+    private SequenceString consensusSequence = SequenceString.NewInstance();
        
-       //should be calculated in case sequence is set
-       @XmlElement (name = "DateSequenced", type= String.class)
-       @XmlJavaTypeAdapter(DateTimeAdapter.class)
-       @Type(type="dateTimeUserType")
-       @Basic(fetch = FetchType.LAZY)
-       private DateTime dateSequenced;
+//     /**{@link #getSequence()}*/
+//     @XmlElement(name = "Sequence")
+//    @Lob
+//     private String sequence;
+//     
+//     @XmlElement(name = "Length")
+//     private Integer length;
+    
        
-       //should be calculated in case sequence is set
        @XmlAttribute(name = "isBarcode")
-       private boolean barcode;
+       private Boolean isBarcode = null;
        
-       //the sequence as a string of base pairs. 5'->3'
-       @XmlElement(name = "CitationMicroReference")
-       private String citationMicroReference;
+       /** @see #getBarcodeSequence()*/
+       @XmlElement(name = "BarcodeSequencePart")
+    private SequenceString barcodeSequencePart = SequenceString.NewInstance();
+
+       /** @see #getGeneticAccessionNumber()*/
+       @XmlElement(name = "GeneticAccessionNumber")
+       @Size(max=20)
+       private String geneticAccessionNumber;
+    
+       /** @see #getBoldProcessId() */
+       @XmlElement(name = "BoldProcessId")
+       @Size(max=20)
+       private String boldProcessId;
        
-       @XmlElement(name = "PublishedIn")
+    @XmlElementWrapper(name = "SingleSequences")
+    @XmlElement(name = "SingleSequence")
+    @XmlIDREF
+    @XmlSchemaType(name = "IDREF")
+    @ManyToMany(fetch = FetchType.LAZY)
+    @Cascade({CascadeType.SAVE_UPDATE})
+       private Set<SingleSequence> singleSequences = new HashSet<SingleSequence>();
+    
+       /** @see #getMarker() */
+       @XmlElement(name = "Marker")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
     @ManyToOne(fetch = FetchType.LAZY)
-    @Cascade(CascadeType.SAVE_UPDATE)
-       private Reference<?> publishedIn;
+       //no cascade as it is a defined term
+       private DefinedTerm marker;
+
+       
+       /** @see #getHaplotype() */
+       @XmlElement(name = "Haplotype")
+       @Size(max=100)
+       private String haplotype;
        
+       /** @see #getCitations() */
        @XmlElementWrapper(name = "Citations")
-       @XmlElement(name = "Citation")
+    @XmlElement(name = "Citation")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
-    @OneToMany(fetch = FetchType.LAZY)
+    @ManyToMany(fetch = FetchType.LAZY)
+    @Cascade({CascadeType.SAVE_UPDATE})
        private Set<Reference> citations = new HashSet<Reference>();
        
-       @XmlElementWrapper(name = "GenBankAccessions")
-       @XmlElement(name = "GenBankAccession")
-    @OneToMany(fetch = FetchType.LAZY)
-       @Cascade(CascadeType.SAVE_UPDATE)
-    private Set<GenBankAccession> genBankAccession = new HashSet<GenBankAccession>();
+//     //should be calculated in case sequence is set
+//     @XmlElement (name = "DateSequenced", type= String.class)
+//     @XmlJavaTypeAdapter(DateTimeAdapter.class)
+//     @Type(type="dateTimeUserType")
+//     @Basic(fetch = FetchType.LAZY)
+//     private DateTime dateSequenced;
        
-       @XmlElement(name = "Locus")
-    @XmlIDREF
-    @XmlSchemaType(name = "IDREF")
-    @ManyToOne(fetch = FetchType.LAZY)
-    @Cascade(CascadeType.SAVE_UPDATE)
-       private Locus locus;
-       
-       @XmlElementWrapper(name = "Chromatograms")
-       @XmlElement(name = "Chromatogram")
-    @XmlIDREF
-    @XmlSchemaType(name = "IDREF")
-    @OneToMany(fetch = FetchType.LAZY)
-       private Set<Media> chromatograms = new HashSet<Media>();
        
 //*********************** FACTORY ****************************************************/
        
-       public static Sequence NewInstance(String sequence){
+       public static Sequence NewInstance(String consensusSequence){
                Sequence result = new Sequence();
-               result.setSequence(sequence);
+               result.setSequenceString(consensusSequence);
                return result;
        }
        
 //*********************** CONSTRUCTOR ****************************************************/
        
-       protected Sequence() {
-               super(); // FIXME I think this is explicit - do we really need to call this?
-               this.cacheStrategy = new IdentifiableEntityDefaultCacheStrategy<Sequence>();
-       }
+       protected Sequence() {}
 
 //*********************** GETTER / SETTER ****************************************************/
        
-       public Locus getLocus(){
-               return this.locus;
-       }
 
-       public void setLocus(Locus locus){
-               this.locus = locus;
+       /**
+        * The {@link DnaSample dna sample} this sequencing belongs too. 
+        */
+       public DnaSample getDnaSample() {
+               return dnaSample;
        }
 
-       public Reference getPublishedIn(){
-               return this.publishedIn;
-       }
-       
-       public void setPublishedIn(Reference publishedIn){
-               this.publishedIn = publishedIn;
+       //TODO bidirectionality??
+       /**
+        * @see #getDnaSample()
+        */
+       private void setDnaSample(DnaSample dnaSample) {
+               this.dnaSample = dnaSample;
        }
 
-       public Set<Reference> getCitations() {
-               return citations;
+       /**
+        * The consensus sequence achieved by this sequencing.
+        */
+       public SequenceString getConsensusSequence() {
+               return consensusSequence;
        }
-       protected void setCitations(Set<Reference> citations) {
-               this.citations = citations;
+
+
+       /**
+        * @see #getConsensusSequence()
+        */
+       public void setConsensusSequence(SequenceString sequenceString) {
+               if (sequenceString == null){
+                       sequenceString = SequenceString.NewInstance();
+               }
+               this.consensusSequence = sequenceString;
        }
-       public void addCitation(Reference citation) {
-               this.citations.add(citation);
+       
+       /**
+        * The isBarcode flag should be set to true if this (consensus) sequence is or includes 
+        * a barcode sequence. If the barcode sequence is only a part of the consensus sequence
+        * this part is to be stored as {@link #getBarcodeSequencePart() barcode sequence part}.
+        * A isBarcode value of <code>null</code> indicates that we do have no knowledge
+        * wether the sequence is a barcoding sequence or not.
+        * 
+        * @see #getBarcodeSequencePart()
+        * @see #getSequenceString()
+        * @returns the isBarcode flag value (tri-state)
+        * 
+        */
+       public Boolean getIsBarcode() {
+               return isBarcode;
        }
-       public void removeCitation(Reference citation) {
-               this.citations.remove(citation);
+
+       /**
+        * @see #getIsBarcode()
+        * @see #getBarcodeSequencePart()
+        */
+       public void setIsBarcode(Boolean isBarcode) {
+               this.isBarcode = isBarcode;
        }
 
-       public Set<GenBankAccession> getGenBankAccession() {
-               return genBankAccession;
+       /**
+        * If the barcode sequence string does not include 100% of the (consensus) sequence 
+        * the part used as barcode is provided here. However, the barcode part
+        * should be kept if consensus sequence string and barcode sequence string are equal.
+        * 
+        * @see #getIsBarcode()
+        */
+       public SequenceString getBarcodeSequencePart() {
+               return barcodeSequencePart;
        }
 
-       public void addGenBankAccession(GenBankAccession genBankAccession) {
-               this.genBankAccession.add(genBankAccession);
+       /**
+        * @see #getBarcodeSequence()
+        */
+       public void setBarcodeSequence(SequenceString barcodeSequencePart) {
+               if (barcodeSequencePart == null){
+                       barcodeSequencePart = SequenceString.NewInstance();
+               }
+               this.barcodeSequencePart = barcodeSequencePart;
        }
        
-       public void removeGenBankAccession(GenBankAccession genBankAccession) {
-               this.genBankAccession.remove(genBankAccession);
+       /**
+        * Sets the {@link TermType#DnaMarker marker} examined and described by this sequencing.
+        * @return
+        */
+       public DefinedTerm getMarker(){
+               return this.marker;
        }
-       
-       public Set<Media> getChromatograms() {
-               return chromatograms;
+
+       /**
+        * @see #getMarker()
+        * @param marker
+        */
+       public void setMarker(DefinedTerm marker){
+               this.marker = marker;
        }
 
-       public void addChromatogram(Media chromatogram) {
-               this.chromatograms.add(chromatogram);
+       /**
+        * The accession number used in GenBank, EMBL and DDBJ. 
+        * @return
+        */
+       public String getGeneticAccessionNumber() {
+               return geneticAccessionNumber;
        }
-       
-       public void removeChromatogram(Media chromatogram) {
-               this.chromatograms.remove(chromatogram);
+
+       /**
+        * Sets the genetic accession number.
+        * @see #getGeneticAccessionNumber()
+        */
+       public void setGeneticAccessionNumber(String geneticAccessionNumber) {
+               this.geneticAccessionNumber = geneticAccessionNumber;
        }
        
-       @Transient
-       public Set<Media> getMedia() {
-               return getChromatograms();
+
+       /**
+        * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
+        */
+       public String getBoldProcessId() {
+               return boldProcessId;
        }
 
-       public String getSequence(){
-               return this.sequence;
+       public void setBoldProcessId(String boldProcessId) {
+               this.boldProcessId = boldProcessId;
        }
 
        /**
-        * 
-        * @param sequence    sequence
+        * Returns the name of the haplotype.
+        * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
+        * A certain haplotype may be specific for an individual, a population or a species.
+        * @return
         */
-       public void setSequence(String sequence){
-               this.sequence = sequence;
+       public String getHaplotype() {
+               return haplotype;
        }
 
-       public Integer getLength(){
-               return this.length;
+       /**
+        * @see #getHaplotype()
+        */
+       public void setHaplotype(String haplotype) {
+               this.haplotype = haplotype;
        }
 
        /**
-        * 
-        * @param length    length
+        * The contigFile containing all data and data processing for this sequencing.
         */
-       public void setLength(Integer length){
-               this.length = length;
+       public Media getContigFile() {
+               return contigFile;
        }
 
-       public DateTime getDateSequenced(){
-               return this.dateSequenced;
+       /**
+        * @see #getContigFile()
+        */
+       public void setContigFile(Media contigFile) {
+               this.contigFile = contigFile;
+       }
+       
+       
+       /**
+        * Citations are the set of references in which this sequence was published.
+        * Unlike taxonomic names the first publication of a sequence
+        * is not so important (maybe because it is required by publishers
+        * that they are all registered at Genbank) therefore we do not have something like an 
+        * "original reference" attribute.<BR> 
+        * Links to these references are to be stored within the reference itself.
+        * @return the set of references in which this sequence was published.
+        */
+       public Set<Reference> getCitations() {
+               return citations;
+       }
+       /**
+        * @see #getCitations()
+        */
+       protected void setCitations(Set<Reference> citations) {
+               this.citations = citations;
+       }
+       /**
+        * @see #getCitations()
+        */
+       public void addCitation(Reference citation) {
+               this.citations.add(citation);
+       }
+       /**
+        * @see #getCitations()
+        */
+       public void removeCitation(Reference citation) {
+               this.citations.remove(citation);
        }
 
        /**
-        * 
-        * @param dateSequenced    dateSequenced
+        * The single sequences that where used to create this consensus sequence.
+        */
+       public Set<SingleSequence> getSingleSequences() {
+               return singleSequences;
+       }
+       /**
+        * @see #getSingleSequences()
+        */
+       public void addSingleSquence(SingleSequence singleSequence) {
+               this.singleSequences.add(singleSequence);
+       }
+       /**
+        * @see #getSingleSequences()
+        */
+       public void removeSingleSquence(SingleSequence singleSequence) {
+               this.singleSequences.remove(singleSequence);
+       }
+       /**
+        * @see #getSingleSequences()
+        */
+       //TODO private as long it is unclear how bidirectionality is handled
+       private void setSingleSequences(Set<SingleSequence> singleSequences) {
+               this.singleSequences = singleSequences;
+       }
+
+
+       //*************************** Transient GETTER /SETTER *****************************/
+
+       /**
+        * Delegate method to get the text representation of the consensus sequence
+        * @see #setSequenceString(String)
         */
-       public void setDateSequenced(DateTime dateSequenced){
-               this.dateSequenced = dateSequenced;
+       @Transient
+       public String getSequenceString() {
+               return consensusSequence.getString();
        }
 
-       public boolean isBarcode(){
-               return this.barcode;
+       /**
+        * Delegate method to set the text representation of the {@link #getConsensusSequence()
+        * consensus sequence}.
+        */
+       @Transient
+       public void setSequenceString(String sequence) {
+               consensusSequence.setString(sequence);
        }
+       
+       /**
+        * Convenience method which computes the set of all related pherograms
+        * @return the set of pherograms.
+        */
+       @Transient
+       public Set<Media> getPherograms(){
+               Set<Media> result = new HashSet<Media>();
+               for (SingleSequence singleSeq : singleSequences){
+                       if (singleSeq.getPherogram() != null){
+                               result.add(singleSeq.getPherogram());
+                       }
+               }
+               return result;
+       }
+       
 
+       //***** Registrations ************/
        /**
-        * 
-        * @param isBarcode    isBarcode
+        * Returns the computed genBank uri.
+        * @return
         */
-       public void setBarcode(boolean barcode){
-               this.barcode = barcode;
+       @Transient
+       public URI getGenBankUri() {
+               return createExternalUri(GENBANK_BASE_URI);
        }
 
-       public String getCitationMicroReference(){
-               return this.citationMicroReference;
+       /**
+        * Returns the computed EMBL uri.
+        * @return
+        */
+       @Transient
+       public URI getEmblUri() {
+               return createExternalUri(EMBL_BASE_URI);
        }
 
        /**
-        * 
-        * @param citationMicroReference    citationMicroReference
+        * Returns the computed DDBJ uri.
+        * @return
+        */
+       @Transient
+       public URI getDdbjUri() {
+               return createExternalUri(DDBJ_BASE_URI);
+       }
+       
+       /**
+        * Returns the URI for the BOLD entry.
+        * @see #getBoldProcessId()
         */
-       public void setCitationMicroReference(String citationMicroReference){
-               this.citationMicroReference = citationMicroReference;
+       @Transient
+       public URI getBoldUri() {
+               return createExternalUri(BOLD_BASE_URI);
        }
 
-       public Reference getCitation(){
-               return publishedIn;
+       
+       private URI createExternalUri(String baseUri){
+               if (StringUtils.isNotBlank(geneticAccessionNumber)){
+                       return URI.create(String.format(baseUri, geneticAccessionNumber.trim()));
+               }else{
+                       return null;
+               }
        }
        
+       
        //*********************** CLONE ********************************************************/
        /** 
         * Clones <i>this</i> sequence. This is a shortcut that enables to create
-        * a new instance that differs only slightly from <i>this</i> sequence by
+        * a new instance that differs only slightly from <i>this</i> sequencing by
         * modifying only some of the attributes.<BR><BR>
         * 
         *  
@@ -287,22 +496,24 @@ public class Sequence extends IdentifiableEntity<IIdentifiableEntityCacheStrateg
                try{
                Sequence result = (Sequence)super.clone();
                
-               result.citations = new HashSet<Reference>();
-               for (Reference ref: this.citations){
-                       result.citations.add((Reference) ref.clone());
+               //sequences
+               result.consensusSequence = (SequenceString)this.consensusSequence.clone();
+               result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
                
+               
+               //single sequences
+               result.singleSequences = new HashSet<SingleSequence>();
+               for (SingleSequence seq: this.singleSequences){
+                       result.singleSequences.add((SingleSequence) seq);
                }
                
-               result.genBankAccession = new HashSet<GenBankAccession>();
-               for (GenBankAccession genBankAcc: this.genBankAccession){
-                       result.genBankAccession.add((GenBankAccession)genBankAcc.clone());
+               //citations  //TODO do we really want to copy these ??
+               result.citations = new HashSet<Reference>();
+               for (Reference ref: this.citations){
+                       result.citations.add((Reference) ref);
                }
                
-               result.chromatograms = new HashSet<Media>();
                
-               for (Media chromatogram: this.chromatograms){
-                       result.chromatograms.add((Media)chromatogram.clone());
-               }
                
                return result;
                }catch (CloneNotSupportedException e) {
@@ -311,4 +522,6 @@ public class Sequence extends IdentifiableEntity<IIdentifiableEntityCacheStrateg
                        return null;
                }
        }
+
+
 }
\ No newline at end of file