Merge branch 'hotfix/3.12.4' into develop
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / model / molecular / Sequence.java
index 71827e0babe12a3e47d311d59bddb6c2425a6a44..6bc65a7d576c0ff16d071e9af8a44378b882ef95 100644 (file)
@@ -1,8 +1,8 @@
 /**
 * Copyright (C) 2007 EDIT
-* European Distributed Institute of Taxonomy 
+* European Distributed Institute of Taxonomy
 * http://www.e-taxonomy.eu
-* 
+*
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * See LICENSE.TXT at the top of this package for the full license terms.
 */
@@ -10,15 +10,17 @@ package eu.etaxonomy.cdm.model.molecular;
 
 
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.HashSet;
 import java.util.Set;
 
+import javax.persistence.Column;
 import javax.persistence.Entity;
 import javax.persistence.FetchType;
 import javax.persistence.ManyToMany;
 import javax.persistence.ManyToOne;
+import javax.persistence.OneToMany;
 import javax.persistence.Transient;
-import javax.validation.constraints.Size;
 import javax.xml.bind.annotation.XmlAccessType;
 import javax.xml.bind.annotation.XmlAccessorType;
 import javax.xml.bind.annotation.XmlAttribute;
@@ -27,6 +29,7 @@ import javax.xml.bind.annotation.XmlElementWrapper;
 import javax.xml.bind.annotation.XmlIDREF;
 import javax.xml.bind.annotation.XmlRootElement;
 import javax.xml.bind.annotation.XmlSchemaType;
+import javax.xml.bind.annotation.XmlTransient;
 import javax.xml.bind.annotation.XmlType;
 
 import org.apache.log4j.Logger;
@@ -46,19 +49,19 @@ import eu.etaxonomy.cdm.model.reference.Reference;
 /**
  * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
  * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
- * while 
- * 
- * <BR>This class holds information about both the combining process of 
+ * while
+ *
+ * <BR>This class holds information about both the combining process of
  * {@link SingleRead single sequences} to one consensus sequence
- * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} ) 
+ * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
  * as well as sequence related information.
- * The later includes the {@link #getConsensusSequence() sequence string} itself, 
- * important genetic information about the DNA that has been sequenced 
- * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as 
+ * The later includes the {@link #getConsensusSequence() sequence string} itself,
+ * important genetic information about the DNA that has been sequenced
+ * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
  * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
- * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id}, 
+ * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
  * {@link #getBarcodeSequencePart() barcode sequence}, ...).
- * 
+ *
  * @author m.doering
  * @created 08-Nov-2007 13:06:51
  * @author a.mueller
@@ -75,7 +78,7 @@ import eu.etaxonomy.cdm.model.reference.Reference;
     "boldProcessId",
     "haplotype",
     "contigFile",
-    "singleReads",
+    "singleReadAlignments",
     "citations"
 })
 @XmlRootElement(name = "Sequencing")
@@ -86,57 +89,58 @@ import eu.etaxonomy.cdm.model.reference.Reference;
 public class Sequence extends AnnotatableEntity implements Cloneable{
        private static final long serialVersionUID = 8298983152731241775L;
        private static final Logger logger = Logger.getLogger(Sequence.class);
-       
+
        //TODO move to cdmlib-ext?
        private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
        private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
        private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
        private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
-       
+
     @XmlElement( name = "DnaSample")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
     @ManyToOne(fetch = FetchType.LAZY)
     @IndexedEmbedded
     private DnaSample dnaSample;
-    
-       
+
+
        /** @see #getContigFile() */
        @XmlElement(name = "ContigFile")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
     @ManyToOne(fetch = FetchType.LAZY)
+       @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
        private Media contigFile;
-    
+
        /** @see #getConsensusSequence() */
        @XmlElement(name = "ConsensusSequence")
     private SequenceString consensusSequence = SequenceString.NewInstance();
-       
+
        @XmlAttribute(name = "isBarcode")
        private Boolean isBarcode = null;
-       
+
        /** @see #getBarcodeSequence()*/
        @XmlElement(name = "BarcodeSequencePart")
     private SequenceString barcodeSequencePart = SequenceString.NewInstance();
 
        /** @see #getGeneticAccessionNumber()*/
        @XmlElement(name = "GeneticAccessionNumber")
-       @Size(max=20)
+    @Column(length=20)
        private String geneticAccessionNumber;
-    
+
        /** @see #getBoldProcessId() */
        @XmlElement(name = "BoldProcessId")
-       @Size(max=20)
+    @Column(length=20)
        private String boldProcessId;
-       
-    @XmlElementWrapper(name = "SingleReads")
-    @XmlElement(name = "SingleRead")
+
+    @XmlElementWrapper(name = "SingleReadAlignments")
+    @XmlElement(name = "SingleReadAlignment")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
-    @ManyToMany(fetch = FetchType.LAZY)
-    @Cascade({CascadeType.SAVE_UPDATE})
-       private Set<SingleRead> singleReads = new HashSet<SingleRead>();
-    
+    @OneToMany(mappedBy="consensusAlignment", fetch = FetchType.LAZY, orphanRemoval=true)
+    @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
+       private Set<SingleReadAlignment> singleReadAlignments = new HashSet<SingleReadAlignment>();
+
        /** @see #getDnaMarker() */
        @XmlElement(name = "DnaMarker")
     @XmlIDREF
@@ -145,70 +149,82 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        //no cascade as it is a defined term
        private DefinedTerm dnaMarker;
 
-       
+
        /** @see #getHaplotype() */
        @XmlElement(name = "Haplotype")
-       @Size(max=100)
+    @Column(length=100)
        private String haplotype;
-       
+
        /** @see #getCitations() */
        @XmlElementWrapper(name = "Citations")
     @XmlElement(name = "Citation")
     @XmlIDREF
     @XmlSchemaType(name = "IDREF")
     @ManyToMany(fetch = FetchType.LAZY)
-    @Cascade({CascadeType.SAVE_UPDATE})
+    @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
        private Set<Reference> citations = new HashSet<Reference>();
-       
+
 //     //should be calculated in case sequence is set
 //     @XmlElement (name = "DateSequenced", type= String.class)
 //     @XmlJavaTypeAdapter(DateTimeAdapter.class)
 //     @Type(type="dateTimeUserType")
 //     @Basic(fetch = FetchType.LAZY)
 //     private DateTime dateSequenced;
-       
-       
+
+
 //*********************** FACTORY ****************************************************/
-       
+
        public static Sequence NewInstance(String consensusSequence){
                Sequence result = new Sequence();
                result.setSequenceString(consensusSequence);
                return result;
        }
-       
-       
+
+
        public static Sequence NewInstance(String consensusSequence, Integer length){
                Sequence result = NewInstance(consensusSequence);
                result.getConsensusSequence().setLength(length);
                return result;
        }
+
+       public static Sequence NewInstance(DnaSample dnaSample, String consensusSequence, Integer length){
+               Sequence result = NewInstance(consensusSequence);
+               result.getConsensusSequence().setLength(length);
+               dnaSample.addSequence(result);
+
+               return result;
+       }
 //*********************** CONSTRUCTOR ****************************************************/
-       
+
        protected Sequence() {}
 
 //*********************** GETTER / SETTER ****************************************************/
-       
+
 
        /**
-        * The {@link DnaSample dna sample} this sequencing belongs too. 
+        * The {@link DnaSample dna sample} this sequencing belongs too.
         */
        public DnaSample getDnaSample() {
                return dnaSample;
        }
 
-       //TODO bidirectionality??
        /**
+        * To be called only from {@link DnaSample#addSequence(Sequence)}
         * @see #getDnaSample()
         */
-       private void setDnaSample(DnaSample dnaSample) {
+       //TODO implement full bidirectionality
+       protected void setDnaSample(DnaSample dnaSample) {
                this.dnaSample = dnaSample;
+               if (dnaSample != null && !dnaSample.getSequences().contains(this)){
+                       throw new RuntimeException("Don't use DNA setter");
+               }
        }
 
        /**
         * The resulting consensus sequence represened by this {@link Sequence sequence} .
         * The consensus is usually computed from the {@link SingleRead single reads}.
         * The result of which is stored in a file called {@link #getContigFile() contig file}
-        * 
+        *
         * #see {@link #getContigFile()}
         * #see {@link #getSingleReads()}
         */
@@ -226,18 +242,18 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
                }
                this.consensusSequence = sequenceString;
        }
-       
+
        /**
-        * The isBarcode flag should be set to true if this (consensus) sequence is or includes 
+        * The isBarcode flag should be set to true if this (consensus) sequence is or includes
         * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
         * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
         * A isBarcode value of <code>null</code> indicates that we do have no knowledge
         * whether the sequence is a barcoding sequence or not.
-        * 
+        *
         * @see #getBarcodeSequencePart()
         * @see #getSequenceString()
         * @returns the isBarcode flag value (tri-state)
-        * 
+        *
         */
        public Boolean getIsBarcode() {
                return isBarcode;
@@ -252,10 +268,10 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        }
 
        /**
-        * If the barcode sequence string does not include 100% of the (consensus) sequence 
+        * If the barcode sequence string does not include 100% of the (consensus) sequence
         * the part used as barcode is provided here. However, the barcode part
         * should be kept empty if consensus sequence string and barcode sequence string are equal.
-        * 
+        *
         * @see #getIsBarcode()
         */
        public SequenceString getBarcodeSequencePart() {
@@ -263,21 +279,21 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        }
 
        /**
-        * @see #getBarcodeSequence()
+        * @see #getBarcodeSequencePart()
         */
-       public void setBarcodeSequence(SequenceString barcodeSequencePart) {
+       public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
                if (barcodeSequencePart == null){
                        barcodeSequencePart = SequenceString.NewInstance();
                }
                this.barcodeSequencePart = barcodeSequencePart;
        }
-       
+
        /**
         * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
         * The marker should usually be similar to the one used in the according {@link Amplification
         * amplification process}. However, it may slightly differ, or, if multiple amplifications where
         * used to build this consensus sequence it may be the super set of the markers used in amplification.
-        * 
+        *
         * @return
         */
        public DefinedTerm getDnaMarker(){
@@ -307,7 +323,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public void setGeneticAccessionNumber(String geneticAccessionNumber) {
                this.geneticAccessionNumber = geneticAccessionNumber;
        }
-       
+
 
        /**
         * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
@@ -339,7 +355,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
 
        /**
         * The contigFile containing all data and data processing for this sequencing.
-        * 
+        *
         * @see #getConsensusSequence()
         * @see #getSingleReads()
         */
@@ -353,14 +369,14 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public void setContigFile(Media contigFile) {
                this.contigFile = contigFile;
        }
-       
-       
+
+
        /**
         * Citations are the set of references in which this sequence was published.
         * Unlike taxonomic names the first publication of a sequence
         * is not so important (maybe because it is required by publishers
-        * that they are all registered at Genbank) therefore we do not have something like an 
-        * "original reference" attribute.<BR> 
+        * that they are all registered at Genbank) therefore we do not have something like an
+        * "original reference" attribute.<BR>
         * Links to these references are to be stored within the reference itself.
         * @return the set of references in which this sequence was published.
         */
@@ -388,31 +404,82 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
 
        /**
         * The {@link SingleRead single reads} that were used to build this consensus sequence.
-        * 
+        *
         * @see #getConsensusSequence()
         * @see #getContigFile()
         */
-       public Set<SingleRead> getSingleReads() {
-               return singleReads;
+       public Set<SingleReadAlignment> getSingleReadAlignments() {
+               return singleReadAlignments;
        }
        /**
         * @see #getSingleReads()
         */
-       public void addSingleRead(SingleRead singleRead) {
-               this.singleReads.add(singleRead);
+       public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
+               this.singleReadAlignments.add(singleReadAlignment);
+               if (! this.equals(singleReadAlignment.getConsensusSequence())){
+                       singleReadAlignment.setConsensusAlignment(this);
+               };
        }
        /**
         * @see #getSingleReads()
         */
+       public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
+               this.singleReadAlignments.remove(singleReadAlignment);
+               if (this.equals(singleReadAlignment.getConsensusSequence())){
+                       singleReadAlignment.setConsensusAlignment(null);
+                       singleReadAlignment.setSingleRead(null);
+               }
+       }
+//     /**
+//      * @see #getSingleReads()
+//      */
+//     //TODO private as long it is unclear how bidirectionality is handled
+//     @SuppressWarnings("unused")
+//     private void setSingleReadAlignments(Set<SingleReadAlignment> singleReadAlignments) {
+//             this.singleReadAlignments = singleReadAlignments;
+//     }
+
+// *********************** CONVENIENCE ***********************************/
+
+       /**
+        * Convenience method to add a single read to a consensus sequence
+        * by creating a {@link SingleReadAlignment}.
+        * @param singleRead the {@link SingleRead} to add
+        * @return the created SingleReadAlignment
+        */
+       public SingleReadAlignment addSingleRead(SingleRead singleRead) {
+               SingleReadAlignment alignment = SingleReadAlignment.NewInstance(this, singleRead);
+               return alignment;
+       }
+
        public void removeSingleRead(SingleRead singleRead) {
-               this.singleReads.remove(singleRead);
+               Set<SingleReadAlignment> toRemove = new HashSet<SingleReadAlignment>();
+               for (SingleReadAlignment align : this.singleReadAlignments){
+                       if (align.getSingleRead() != null && align.getSingleRead().equals(singleRead)){
+                               toRemove.add(align);
+                       }
+               }
+               for (SingleReadAlignment align : toRemove){
+                       removeSingleReadAlignment(align);
+               }
+               return;
        }
+
        /**
-        * @see #getSingleReads()
+        * Convenience method that returns all single reads this consensus sequence
+        * is based on via {@link SingleReadAlignment}s.
+        * @return set of related single reads
         */
-       //TODO private as long it is unclear how bidirectionality is handled
-       private void setSingleReads(Set<SingleRead> singleReads) {
-               this.singleReads = singleReads;
+       @XmlTransient
+       @Transient
+       public Set<SingleRead> getSingleReads(){
+               Set<SingleRead> singleReads = new HashSet<SingleRead>();
+               for (SingleReadAlignment align : this.singleReadAlignments){
+                       if (align.getSingleRead() != null){  // == null should not happen
+                               singleReads.add(align.getSingleRead());
+                       }
+               }
+               return singleReads;
        }
 
 
@@ -435,7 +502,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public void setSequenceString(String sequence) {
                consensusSequence.setString(sequence);
        }
-       
+
        /**
         * Convenience method which computes the set of all related pherograms
         * @return the set of pherograms.
@@ -443,69 +510,75 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        @Transient
        public Set<Media> getPherograms(){
                Set<Media> result = new HashSet<Media>();
-               for (SingleRead singleSeq : singleReads){
-                       if (singleSeq.getPherogram() != null){
-                               result.add(singleSeq.getPherogram());
+               for (SingleReadAlignment singleReadAlign : singleReadAlignments){
+                       if (singleReadAlign.getSingleRead() != null &&  singleReadAlign.getSingleRead().getPherogram() != null){
+                               result.add(singleReadAlign.getSingleRead().getPherogram());
                        }
                }
                return result;
        }
-       
+
 
        //***** Registrations ************/
        /**
         * Returns the computed genBank uri.
-        * @return
+        * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber}
+        * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
         */
        @Transient
-       public URI getGenBankUri() {
-               return createExternalUri(GENBANK_BASE_URI);
+       public URI getGenBankUri() throws URISyntaxException {
+               return createExternalUri(GENBANK_BASE_URI, geneticAccessionNumber);
        }
 
        /**
         * Returns the computed EMBL uri.
-        * @return
+        * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber}
+        * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
         */
        @Transient
-       public URI getEmblUri() {
-               return createExternalUri(EMBL_BASE_URI);
+       public URI getEmblUri() throws URISyntaxException {
+               return createExternalUri(EMBL_BASE_URI, geneticAccessionNumber);
        }
 
        /**
         * Returns the computed DDBJ uri.
-        * @return
+        * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber}
+        * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
         */
        @Transient
-       public URI getDdbjUri() {
-               return createExternalUri(DDBJ_BASE_URI);
+       public URI getDdbjUri() throws URISyntaxException {
+               return createExternalUri(DDBJ_BASE_URI, geneticAccessionNumber);
        }
-       
+
        /**
         * Returns the URI for the BOLD entry.
+        * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId}
+        * @throws URISyntaxException when URI could not be created with {@link #boldProcessId}
         * @see #getBoldProcessId()
         */
        @Transient
-       public URI getBoldUri() {
-               return createExternalUri(BOLD_BASE_URI);
+       public URI getBoldUri() throws URISyntaxException {
+               return createExternalUri(BOLD_BASE_URI, boldProcessId);
        }
 
-       
-       private URI createExternalUri(String baseUri){
-               if (StringUtils.isNotBlank(geneticAccessionNumber)){
-                       return URI.create(String.format(baseUri, geneticAccessionNumber.trim()));
+       private URI createExternalUri(String baseUri, String id) throws URISyntaxException{
+               if (StringUtils.isNotBlank(id)){
+                       return new URI(String.format(baseUri, id.trim()));
                }else{
                        return null;
                }
        }
-       
-       
+
+
+
+
        //*********************** CLONE ********************************************************/
-       /** 
+       /**
         * Clones <i>this</i> sequence. This is a shortcut that enables to create
         * a new instance that differs only slightly from <i>this</i> sequencing by
         * modifying only some of the attributes.<BR><BR>
-        * 
-        *  
+        *
+        *
         * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
         * @see java.lang.Object#clone()
         */
@@ -513,26 +586,27 @@ public class Sequence extends AnnotatableEntity implements Cloneable{
        public Object clone()  {
                try{
                Sequence result = (Sequence)super.clone();
-               
+
                //sequences
                result.consensusSequence = (SequenceString)this.consensusSequence.clone();
                result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
-               
-               
+
+
                //single sequences
-               result.singleReads = new HashSet<SingleRead>();
-               for (SingleRead seq: this.singleReads){
-                       result.singleReads.add((SingleRead) seq);
+               result.singleReadAlignments = new HashSet<SingleReadAlignment>();
+               for (SingleReadAlignment singleReadAlign: this.singleReadAlignments){
+                       SingleReadAlignment newAlignment = (SingleReadAlignment)singleReadAlign.clone();
+                       result.singleReadAlignments.add(newAlignment);
                }
-               
+
                //citations  //TODO do we really want to copy these ??
                result.citations = new HashSet<Reference>();
-               for (Reference ref: this.citations){
-                       result.citations.add((Reference) ref);
+               for (Reference<?> ref: this.citations){
+                       result.citations.add(ref);
                }
-               
-               
-               
+
+
+
                return result;
                }catch (CloneNotSupportedException e) {
                        logger.warn("Object does not implement cloneable");