separate Amplification and AmplificationResult.java #4541
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / model / molecular / Sequence.java
index 4730edf233a192577072109f625a6dd4d5b1a739..87eef90d3b23bca80a80b4630f08ca2a104e5052 100644 (file)
@@ -1,8 +1,8 @@
 /**
 * Copyright (C) 2007 EDIT
-* European Distributed Institute of Taxonomy 
+* European Distributed Institute of Taxonomy
 * http://www.e-taxonomy.eu
-* 
+*
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * See LICENSE.TXT at the top of this package for the full license terms.
 */
@@ -10,6 +10,7 @@ package eu.etaxonomy.cdm.model.molecular;
 
 
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -17,6 +18,7 @@ import javax.persistence.Entity;
 import javax.persistence.FetchType;
 import javax.persistence.ManyToMany;
 import javax.persistence.ManyToOne;
+import javax.persistence.OneToMany;
 import javax.persistence.Transient;
 import javax.validation.constraints.Size;
 import javax.xml.bind.annotation.XmlAccessType;
@@ -27,6 +29,7 @@ import javax.xml.bind.annotation.XmlElementWrapper;
 import javax.xml.bind.annotation.XmlIDREF;
 import javax.xml.bind.annotation.XmlRootElement;
 import javax.xml.bind.annotation.XmlSchemaType;
+import javax.xml.bind.annotation.XmlTransient;
 import javax.xml.bind.annotation.XmlType;
 
 import org.apache.log4j.Logger;
@@ -44,16 +47,21 @@ import eu.etaxonomy.cdm.model.media.Media;
 import eu.etaxonomy.cdm.model.reference.Reference;
 
 /**
- * Alignment of multiple single sequences to a consensus sequence, 
- * may also include the extracted barcode sequence.
- * 
- * This class holds information about both the combining process of 
+ * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
+ * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
+ * while
+ *
+ * <BR>This class holds information about both the combining process of
  * {@link SingleRead single sequences} to one consensus sequence
- * (singleReads, contigFile) as well as sequence related information.
- * The later includes the sequence string itself, important genetic information
- * (marker, haplotype) as well as registration information (genetic accession number)
- * citations and barcoding information.
- * 
+ * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
+ * as well as sequence related information.
+ * The later includes the {@link #getConsensusSequence() sequence string} itself,
+ * important genetic information about the DNA that has been sequenced
+ * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
+ * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
+ * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
+ * {@link #getBarcodeSequencePart() barcode sequence}, ...).
+ *
  * @author m.doering
  * @created 08-Nov-2007 13:06:51
  * @author a.mueller
@@ -70,7 +78,7 @@ import eu.etaxonomy.cdm.model.reference.Reference;
     "boldProcessId",
     "haplotype",
     "contigFile",
-    "singleReads",
+    "singleReadAlignments",
     "citations"
 })
 @XmlRootElement(name = "Sequencing")
@@ -81,43 +89,36 @@ import eu.etaxonomy.cdm.model.reference.Reference;
 public class Sequence extends AnnotatableEntity implements Cloneable{
        private static final long serialVersionUID = 8298983152731241775L;
        private static final Logger logger = Logger.getLogger(Sequence.class);
-       
+
+       //TODO move to cdmlib-ext?
        private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
        private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
        private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
        private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
-       
+
     @XmlElement( name = "DnaSample")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
     @ManyToOne(fetch = FetchType.LAZY)
     @IndexedEmbedded
     private DnaSample dnaSample;
-    
-       
+
+
        /** @see #getContigFile() */
        @XmlElement(name = "ContigFile")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
     @ManyToOne(fetch = FetchType.LAZY)
+       @Cascade({CascadeType.SAVE_UPDATE})
        private Media contigFile;
-    
+
        /** @see #getConsensusSequence() */
        @XmlElement(name = "ConsensusSequence")
     private SequenceString consensusSequence = SequenceString.NewInstance();
-       
-//     /**{@link #getSequence()}*/
-//     @XmlElement(name = "Sequence")
-//    @Lob
-//     private String sequence;
-//     
-//     @XmlElement(name = "Length")
-//     private Integer length;
-    
-       
+
        @XmlAttribute(name = "isBarcode")
        private Boolean isBarcode = null;
-       
+
        /** @see #getBarcodeSequence()*/
        @XmlElement(name = "BarcodeSequencePart")
     private SequenceString barcodeSequencePart = SequenceString.NewInstance();
@@ -126,20 +127,20 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        @XmlElement(name = "GeneticAccessionNumber")
        @Size(max=20)
        private String geneticAccessionNumber;
-    
+
        /** @see #getBoldProcessId() */
        @XmlElement(name = "BoldProcessId")
        @Size(max=20)
        private String boldProcessId;
-       
-    @XmlElementWrapper(name = "SingleReads")
-    @XmlElement(name = "SingleRead")
+
+    @XmlElementWrapper(name = "SingleReadAlignments")
+    @XmlElement(name = "SingleReadAlignment")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
-    @ManyToMany(fetch = FetchType.LAZY)
+    @OneToMany(mappedBy="consensusAlignment", fetch = FetchType.LAZY)
     @Cascade({CascadeType.SAVE_UPDATE})
-       private Set<SingleRead> singleReads = new HashSet<SingleRead>();
-    
+       private Set<SingleReadAlignment> singleReadAlignments = new HashSet<SingleReadAlignment>();
+
        /** @see #getDnaMarker() */
        @XmlElement(name = "DnaMarker")
     @XmlIDREF
@@ -148,12 +149,12 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        //no cascade as it is a defined term
        private DefinedTerm dnaMarker;
 
-       
+
        /** @see #getHaplotype() */
        @XmlElement(name = "Haplotype")
        @Size(max=100)
        private String haplotype;
-       
+
        /** @see #getCitations() */
        @XmlElementWrapper(name = "Citations")
     @XmlElement(name = "Citation")
@@ -162,47 +163,70 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
     @ManyToMany(fetch = FetchType.LAZY)
     @Cascade({CascadeType.SAVE_UPDATE})
        private Set<Reference> citations = new HashSet<Reference>();
-       
+
 //     //should be calculated in case sequence is set
 //     @XmlElement (name = "DateSequenced", type= String.class)
 //     @XmlJavaTypeAdapter(DateTimeAdapter.class)
 //     @Type(type="dateTimeUserType")
 //     @Basic(fetch = FetchType.LAZY)
 //     private DateTime dateSequenced;
-       
-       
+
+
 //*********************** FACTORY ****************************************************/
-       
+
        public static Sequence NewInstance(String consensusSequence){
                Sequence result = new Sequence();
                result.setSequenceString(consensusSequence);
                return result;
        }
-       
+
+
+       public static Sequence NewInstance(String consensusSequence, Integer length){
+               Sequence result = NewInstance(consensusSequence);
+               result.getConsensusSequence().setLength(length);
+               return result;
+       }
+
+       public static Sequence NewInstance(DnaSample dnaSample, String consensusSequence, Integer length){
+               Sequence result = NewInstance(consensusSequence);
+               result.getConsensusSequence().setLength(length);
+               dnaSample.addSequence(result);
+
+               return result;
+       }
 //*********************** CONSTRUCTOR ****************************************************/
-       
+
        protected Sequence() {}
 
 //*********************** GETTER / SETTER ****************************************************/
-       
+
 
        /**
-        * The {@link DnaSample dna sample} this sequencing belongs too. 
+        * The {@link DnaSample dna sample} this sequencing belongs too.
         */
        public DnaSample getDnaSample() {
                return dnaSample;
        }
 
-       //TODO bidirectionality??
        /**
+        * To be called only from {@link DnaSample#addSequence(Sequence)}
         * @see #getDnaSample()
         */
-       private void setDnaSample(DnaSample dnaSample) {
+       //TODO implement full bidirectionality
+       protected void setDnaSample(DnaSample dnaSample) {
                this.dnaSample = dnaSample;
+               if (dnaSample != null && !dnaSample.getSequences().contains(this)){
+                       throw new RuntimeException("Don't use DNA setter");
+               }
        }
 
        /**
-        * The consensus sequence achieved by this sequencing.
+        * The resulting consensus sequence represened by this {@link Sequence sequence} .
+        * The consensus is usually computed from the {@link SingleRead single reads}.
+        * The result of which is stored in a file called {@link #getContigFile() contig file}
+        *
+        * #see {@link #getContigFile()}
+        * #see {@link #getSingleReads()}
         */
        public SequenceString getConsensusSequence() {
                return consensusSequence;
@@ -218,18 +242,18 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
                }
                this.consensusSequence = sequenceString;
        }
-       
+
        /**
-        * The isBarcode flag should be set to true if this (consensus) sequence is or includes 
-        * a barcode sequence. If the barcode sequence is only a part of the consensus sequence
-        * this part is to be stored as {@link #getBarcodeSequencePart() barcode sequence part}.
+        * The isBarcode flag should be set to true if this (consensus) sequence is or includes
+        * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
+        * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
         * A isBarcode value of <code>null</code> indicates that we do have no knowledge
-        * wether the sequence is a barcoding sequence or not.
-        * 
+        * whether the sequence is a barcoding sequence or not.
+        *
         * @see #getBarcodeSequencePart()
         * @see #getSequenceString()
         * @returns the isBarcode flag value (tri-state)
-        * 
+        *
         */
        public Boolean getIsBarcode() {
                return isBarcode;
@@ -244,10 +268,10 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        }
 
        /**
-        * If the barcode sequence string does not include 100% of the (consensus) sequence 
+        * If the barcode sequence string does not include 100% of the (consensus) sequence
         * the part used as barcode is provided here. However, the barcode part
-        * should be kept if consensus sequence string and barcode sequence string are equal.
-        * 
+        * should be kept empty if consensus sequence string and barcode sequence string are equal.
+        *
         * @see #getIsBarcode()
         */
        public SequenceString getBarcodeSequencePart() {
@@ -255,17 +279,21 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        }
 
        /**
-        * @see #getBarcodeSequence()
+        * @see #getBarcodeSequencePart()
         */
-       public void setBarcodeSequence(SequenceString barcodeSequencePart) {
+       public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
                if (barcodeSequencePart == null){
                        barcodeSequencePart = SequenceString.NewInstance();
                }
                this.barcodeSequencePart = barcodeSequencePart;
        }
-       
+
        /**
-        * Sets the {@link TermType#DnaMarker marker} examined and described by this sequencing.
+        * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
+        * The marker should usually be similar to the one used in the according {@link Amplification
+        * amplification process}. However, it may slightly differ, or, if multiple amplifications where
+        * used to build this consensus sequence it may be the super set of the markers used in amplification.
+        *
         * @return
         */
        public DefinedTerm getDnaMarker(){
@@ -281,7 +309,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        }
 
        /**
-        * The accession number used in GenBank, EMBL and DDBJ. 
+        * The accession number used in GenBank, EMBL and DDBJ.
         * @return
         */
        public String getGeneticAccessionNumber() {
@@ -295,7 +323,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public void setGeneticAccessionNumber(String geneticAccessionNumber) {
                this.geneticAccessionNumber = geneticAccessionNumber;
        }
-       
+
 
        /**
         * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
@@ -327,6 +355,9 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
 
        /**
         * The contigFile containing all data and data processing for this sequencing.
+        *
+        * @see #getConsensusSequence()
+        * @see #getSingleReads()
         */
        public Media getContigFile() {
                return contigFile;
@@ -338,14 +369,14 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public void setContigFile(Media contigFile) {
                this.contigFile = contigFile;
        }
-       
-       
+
+
        /**
         * Citations are the set of references in which this sequence was published.
         * Unlike taxonomic names the first publication of a sequence
         * is not so important (maybe because it is required by publishers
-        * that they are all registered at Genbank) therefore we do not have something like an 
-        * "original reference" attribute.<BR> 
+        * that they are all registered at Genbank) therefore we do not have something like an
+        * "original reference" attribute.<BR>
         * Links to these references are to be stored within the reference itself.
         * @return the set of references in which this sequence was published.
         */
@@ -372,29 +403,82 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        }
 
        /**
-        * The single reads that where used to create this consensus sequence.
+        * The {@link SingleRead single reads} that were used to build this consensus sequence.
+        *
+        * @see #getConsensusSequence()
+        * @see #getContigFile()
         */
-       public Set<SingleRead> getSingleReads() {
-               return singleReads;
+       public Set<SingleReadAlignment> getSingleReadAlignments() {
+               return singleReadAlignments;
        }
        /**
         * @see #getSingleReads()
         */
-       public void addSingleRead(SingleRead singleRead) {
-               this.singleReads.add(singleRead);
+       public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
+               this.singleReadAlignments.add(singleReadAlignment);
+               if (! this.equals(singleReadAlignment.getConsensusSequence())){
+                       singleReadAlignment.setConsensusAlignment(this);
+               };
        }
        /**
         * @see #getSingleReads()
         */
+       public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
+               this.singleReadAlignments.remove(singleReadAlignment);
+               if (this.equals(singleReadAlignment.getConsensusSequence())){
+                       singleReadAlignment.setConsensusAlignment(null);
+               }
+       }
+//     /**
+//      * @see #getSingleReads()
+//      */
+//     //TODO private as long it is unclear how bidirectionality is handled
+//     @SuppressWarnings("unused")
+//     private void setSingleReadAlignments(Set<SingleReadAlignment> singleReadAlignments) {
+//             this.singleReadAlignments = singleReadAlignments;
+//     }
+       
+// *********************** CONVENIENCE ***********************************/
+       
+       /**
+        * Convenience method to add a single read to a consensus sequence
+        * by creating a {@link SingleReadAlignment}.
+        * @param singleRead the {@link SingleRead} to add
+        * @return the created SingleReadAlignment
+        */
+       public SingleReadAlignment addSingleRead(SingleRead singleRead) {
+               SingleReadAlignment alignment = SingleReadAlignment.NewInstance(this, singleRead);
+               return alignment;
+       }
+       
        public void removeSingleRead(SingleRead singleRead) {
-               this.singleReads.remove(singleRead);
+               Set<SingleReadAlignment> toRemove = new HashSet<SingleReadAlignment>();
+               for (SingleReadAlignment align : this.singleReadAlignments){
+                       if (align.getSingleRead() != null && align.getSingleRead().equals(singleRead)){
+                               toRemove.add(align);
+                       }
+               }
+               for (SingleReadAlignment align : toRemove){
+                       removeSingleReadAlignment(align);
+               }
+               return;
        }
+       
        /**
-        * @see #getSingleReads()
+        * Convenience method that returns all single reads this consensus sequence
+        * is based on via {@link SingleReadAlignment}s.
+        * @return set of related single reads
         */
-       //TODO private as long it is unclear how bidirectionality is handled
-       private void setSingleReads(Set<SingleRead> singleReads) {
-               this.singleReads = singleReads;
+       @XmlTransient
+       @Transient
+       public Set<SingleRead> getSingleReads(){
+               Set<SingleRead> singleReads = new HashSet<SingleRead>();
+               for (SingleReadAlignment align : this.singleReadAlignments){
+                       if (align.getSingleRead() != null){  // == null should not happen
+                               singleReads.add(align.getSingleRead());
+                       }
+               }
+               return singleReads;
        }
 
 
@@ -417,7 +501,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public void setSequenceString(String sequence) {
                consensusSequence.setString(sequence);
        }
-       
+
        /**
         * Convenience method which computes the set of all related pherograms
         * @return the set of pherograms.
@@ -425,69 +509,75 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        @Transient
        public Set<Media> getPherograms(){
                Set<Media> result = new HashSet<Media>();
-               for (SingleRead singleSeq : singleReads){
-                       if (singleSeq.getPherogram() != null){
-                               result.add(singleSeq.getPherogram());
+               for (SingleReadAlignment singleReadAlign : singleReadAlignments){
+                       if (singleReadAlign.getSingleRead() != null &&  singleReadAlign.getSingleRead().getPherogram() != null){
+                               result.add(singleReadAlign.getSingleRead().getPherogram());
                        }
                }
                return result;
        }
-       
+
 
        //***** Registrations ************/
        /**
         * Returns the computed genBank uri.
-        * @return
+        * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber}
+        * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
         */
        @Transient
-       public URI getGenBankUri() {
-               return createExternalUri(GENBANK_BASE_URI);
+       public URI getGenBankUri() throws URISyntaxException {
+               return createExternalUri(GENBANK_BASE_URI, geneticAccessionNumber);
        }
 
        /**
         * Returns the computed EMBL uri.
-        * @return
+        * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber}
+        * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
         */
        @Transient
-       public URI getEmblUri() {
-               return createExternalUri(EMBL_BASE_URI);
+       public URI getEmblUri() throws URISyntaxException {
+               return createExternalUri(EMBL_BASE_URI, geneticAccessionNumber);
        }
 
        /**
         * Returns the computed DDBJ uri.
-        * @return
+        * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber}
+        * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
         */
        @Transient
-       public URI getDdbjUri() {
-               return createExternalUri(DDBJ_BASE_URI);
+       public URI getDdbjUri() throws URISyntaxException {
+               return createExternalUri(DDBJ_BASE_URI, geneticAccessionNumber);
        }
-       
+
        /**
         * Returns the URI for the BOLD entry.
+        * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId}
+        * @throws URISyntaxException when URI could not be created with {@link #boldProcessId}
         * @see #getBoldProcessId()
         */
        @Transient
-       public URI getBoldUri() {
-               return createExternalUri(BOLD_BASE_URI);
+       public URI getBoldUri() throws URISyntaxException {
+               return createExternalUri(BOLD_BASE_URI, boldProcessId);
        }
 
-       
-       private URI createExternalUri(String baseUri){
-               if (StringUtils.isNotBlank(geneticAccessionNumber)){
-                       return URI.create(String.format(baseUri, geneticAccessionNumber.trim()));
+       private URI createExternalUri(String baseUri, String id) throws URISyntaxException{
+               if (StringUtils.isNotBlank(id)){
+                       return new URI(String.format(baseUri, id.trim()));
                }else{
                        return null;
                }
        }
-       
-       
+
+
+
+
        //*********************** CLONE ********************************************************/
-       /** 
+       /**
         * Clones <i>this</i> sequence. This is a shortcut that enables to create
         * a new instance that differs only slightly from <i>this</i> sequencing by
         * modifying only some of the attributes.<BR><BR>
-        * 
-        *  
+        *
+        *
         * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
         * @see java.lang.Object#clone()
         */
@@ -495,26 +585,27 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public Object clone()  {
                try{
                Sequence result = (Sequence)super.clone();
-               
+
                //sequences
                result.consensusSequence = (SequenceString)this.consensusSequence.clone();
                result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
-               
-               
+
+
                //single sequences
-               result.singleReads = new HashSet<SingleRead>();
-               for (SingleRead seq: this.singleReads){
-                       result.singleReads.add((SingleRead) seq);
+               result.singleReadAlignments = new HashSet<SingleReadAlignment>();
+               for (SingleReadAlignment singleReadAlign: this.singleReadAlignments){
+                       SingleReadAlignment newAlignment = (SingleReadAlignment)singleReadAlign.clone();
+                       result.singleReadAlignments.add(newAlignment);
                }
-               
+
                //citations  //TODO do we really want to copy these ??
                result.citations = new HashSet<Reference>();
-               for (Reference ref: this.citations){
-                       result.citations.add((Reference) ref);
+               for (Reference<?> ref: this.citations){
+                       result.citations.add(ref);
                }
-               
-               
-               
+
+
+
                return result;
                }catch (CloneNotSupportedException e) {
                        logger.warn("Object does not implement cloneable");