/**
* Copyright (C) 2007 EDIT
-* European Distributed Institute of Taxonomy
+* European Distributed Institute of Taxonomy
* http://www.e-taxonomy.eu
-*
+*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* See LICENSE.TXT at the top of this package for the full license terms.
*/
import java.net.URI;
+import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Set;
+import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.FetchType;
import javax.persistence.ManyToMany;
import javax.persistence.ManyToOne;
+import javax.persistence.OneToMany;
import javax.persistence.Transient;
-import javax.validation.constraints.Size;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlIDREF;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlSchemaType;
+import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlType;
import org.apache.log4j.Logger;
/**
* Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
* This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
- * while
- *
- * <BR>This class holds information about both the combining process of
+ * while
+ *
+ * <BR>This class holds information about both the combining process of
* {@link SingleRead single sequences} to one consensus sequence
- * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
+ * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
* as well as sequence related information.
- * The later includes the {@link #getConsensusSequence() sequence string} itself,
- * important genetic information about the DNA that has been sequenced
- * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
+ * The later includes the {@link #getConsensusSequence() sequence string} itself,
+ * important genetic information about the DNA that has been sequenced
+ * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
* registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
- * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
+ * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
* {@link #getBarcodeSequencePart() barcode sequence}, ...).
- *
+ *
* @author m.doering
* @created 08-Nov-2007 13:06:51
* @author a.mueller
"boldProcessId",
"haplotype",
"contigFile",
- "singleReads",
+ "singleReadAlignments",
"citations"
})
@XmlRootElement(name = "Sequencing")
public class Sequence extends AnnotatableEntity implements Cloneable{
private static final long serialVersionUID = 8298983152731241775L;
private static final Logger logger = Logger.getLogger(Sequence.class);
-
+
//TODO move to cdmlib-ext?
private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
-
+
@XmlElement( name = "DnaSample")
@XmlIDREF
@XmlSchemaType(name = "IDREF")
@ManyToOne(fetch = FetchType.LAZY)
@IndexedEmbedded
private DnaSample dnaSample;
-
-
+
+
/** @see #getContigFile() */
@XmlElement(name = "ContigFile")
@XmlIDREF
@XmlSchemaType(name = "IDREF")
@ManyToOne(fetch = FetchType.LAZY)
+ @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
private Media contigFile;
-
+
/** @see #getConsensusSequence() */
@XmlElement(name = "ConsensusSequence")
private SequenceString consensusSequence = SequenceString.NewInstance();
-
+
@XmlAttribute(name = "isBarcode")
private Boolean isBarcode = null;
-
+
/** @see #getBarcodeSequence()*/
@XmlElement(name = "BarcodeSequencePart")
private SequenceString barcodeSequencePart = SequenceString.NewInstance();
/** @see #getGeneticAccessionNumber()*/
@XmlElement(name = "GeneticAccessionNumber")
- @Size(max=20)
+ @Column(length=20)
private String geneticAccessionNumber;
-
+
/** @see #getBoldProcessId() */
@XmlElement(name = "BoldProcessId")
- @Size(max=20)
+ @Column(length=20)
private String boldProcessId;
-
- @XmlElementWrapper(name = "SingleReads")
- @XmlElement(name = "SingleRead")
+
+ @XmlElementWrapper(name = "SingleReadAlignments")
+ @XmlElement(name = "SingleReadAlignment")
@XmlIDREF
@XmlSchemaType(name = "IDREF")
- @ManyToMany(fetch = FetchType.LAZY)
- @Cascade({CascadeType.SAVE_UPDATE})
- private Set<SingleRead> singleReads = new HashSet<SingleRead>();
-
+ @OneToMany(mappedBy="consensusAlignment", fetch = FetchType.LAZY, orphanRemoval=true)
+ @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
+ private Set<SingleReadAlignment> singleReadAlignments = new HashSet<SingleReadAlignment>();
+
/** @see #getDnaMarker() */
@XmlElement(name = "DnaMarker")
@XmlIDREF
//no cascade as it is a defined term
private DefinedTerm dnaMarker;
-
+
/** @see #getHaplotype() */
@XmlElement(name = "Haplotype")
- @Size(max=100)
+ @Column(length=100)
private String haplotype;
-
+
/** @see #getCitations() */
@XmlElementWrapper(name = "Citations")
@XmlElement(name = "Citation")
@XmlIDREF
@XmlSchemaType(name = "IDREF")
@ManyToMany(fetch = FetchType.LAZY)
- @Cascade({CascadeType.SAVE_UPDATE})
+ @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
private Set<Reference> citations = new HashSet<Reference>();
-
+
// //should be calculated in case sequence is set
// @XmlElement (name = "DateSequenced", type= String.class)
// @XmlJavaTypeAdapter(DateTimeAdapter.class)
// @Type(type="dateTimeUserType")
// @Basic(fetch = FetchType.LAZY)
// private DateTime dateSequenced;
-
-
+
+
//*********************** FACTORY ****************************************************/
-
+
public static Sequence NewInstance(String consensusSequence){
Sequence result = new Sequence();
result.setSequenceString(consensusSequence);
return result;
}
-
-
+
+
public static Sequence NewInstance(String consensusSequence, Integer length){
Sequence result = NewInstance(consensusSequence);
result.getConsensusSequence().setLength(length);
return result;
}
+
+ public static Sequence NewInstance(DnaSample dnaSample, String consensusSequence, Integer length){
+ Sequence result = NewInstance(consensusSequence);
+ result.getConsensusSequence().setLength(length);
+ dnaSample.addSequence(result);
+
+ return result;
+ }
//*********************** CONSTRUCTOR ****************************************************/
-
+
protected Sequence() {}
//*********************** GETTER / SETTER ****************************************************/
-
+
/**
- * The {@link DnaSample dna sample} this sequencing belongs too.
+ * The {@link DnaSample dna sample} this sequencing belongs too.
*/
public DnaSample getDnaSample() {
return dnaSample;
}
- //TODO bidirectionality??
/**
+ * To be called only from {@link DnaSample#addSequence(Sequence)}
* @see #getDnaSample()
*/
- private void setDnaSample(DnaSample dnaSample) {
+ //TODO implement full bidirectionality
+ protected void setDnaSample(DnaSample dnaSample) {
this.dnaSample = dnaSample;
+ if (dnaSample != null && !dnaSample.getSequences().contains(this)){
+ throw new RuntimeException("Don't use DNA setter");
+ }
}
/**
* The resulting consensus sequence represened by this {@link Sequence sequence} .
* The consensus is usually computed from the {@link SingleRead single reads}.
* The result of which is stored in a file called {@link #getContigFile() contig file}
- *
+ *
* #see {@link #getContigFile()}
* #see {@link #getSingleReads()}
*/
}
this.consensusSequence = sequenceString;
}
-
+
/**
- * The isBarcode flag should be set to true if this (consensus) sequence is or includes
+ * The isBarcode flag should be set to true if this (consensus) sequence is or includes
* a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
* this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
* A isBarcode value of <code>null</code> indicates that we do have no knowledge
* whether the sequence is a barcoding sequence or not.
- *
+ *
* @see #getBarcodeSequencePart()
* @see #getSequenceString()
* @returns the isBarcode flag value (tri-state)
- *
+ *
*/
public Boolean getIsBarcode() {
return isBarcode;
}
/**
- * If the barcode sequence string does not include 100% of the (consensus) sequence
+ * If the barcode sequence string does not include 100% of the (consensus) sequence
* the part used as barcode is provided here. However, the barcode part
* should be kept empty if consensus sequence string and barcode sequence string are equal.
- *
+ *
* @see #getIsBarcode()
*/
public SequenceString getBarcodeSequencePart() {
}
/**
- * @see #getBarcodeSequence()
+ * @see #getBarcodeSequencePart()
*/
- public void setBarcodeSequence(SequenceString barcodeSequencePart) {
+ public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
if (barcodeSequencePart == null){
barcodeSequencePart = SequenceString.NewInstance();
}
this.barcodeSequencePart = barcodeSequencePart;
}
-
+
/**
* Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
* The marker should usually be similar to the one used in the according {@link Amplification
* amplification process}. However, it may slightly differ, or, if multiple amplifications where
* used to build this consensus sequence it may be the super set of the markers used in amplification.
- *
+ *
* @return
*/
public DefinedTerm getDnaMarker(){
public void setGeneticAccessionNumber(String geneticAccessionNumber) {
this.geneticAccessionNumber = geneticAccessionNumber;
}
-
+
/**
* The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
/**
* The contigFile containing all data and data processing for this sequencing.
- *
+ *
* @see #getConsensusSequence()
* @see #getSingleReads()
*/
public void setContigFile(Media contigFile) {
this.contigFile = contigFile;
}
-
-
+
+
/**
* Citations are the set of references in which this sequence was published.
* Unlike taxonomic names the first publication of a sequence
* is not so important (maybe because it is required by publishers
- * that they are all registered at Genbank) therefore we do not have something like an
- * "original reference" attribute.<BR>
+ * that they are all registered at Genbank) therefore we do not have something like an
+ * "original reference" attribute.<BR>
* Links to these references are to be stored within the reference itself.
* @return the set of references in which this sequence was published.
*/
/**
* The {@link SingleRead single reads} that were used to build this consensus sequence.
- *
+ *
* @see #getConsensusSequence()
* @see #getContigFile()
*/
- public Set<SingleRead> getSingleReads() {
- return singleReads;
+ public Set<SingleReadAlignment> getSingleReadAlignments() {
+ return singleReadAlignments;
}
/**
* @see #getSingleReads()
*/
- public void addSingleRead(SingleRead singleRead) {
- this.singleReads.add(singleRead);
+ public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
+ this.singleReadAlignments.add(singleReadAlignment);
+ if (! this.equals(singleReadAlignment.getConsensusSequence())){
+ singleReadAlignment.setConsensusAlignment(this);
+ };
}
/**
* @see #getSingleReads()
*/
+ public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
+ this.singleReadAlignments.remove(singleReadAlignment);
+ if (this.equals(singleReadAlignment.getConsensusSequence())){
+ singleReadAlignment.setConsensusAlignment(null);
+ singleReadAlignment.setSingleRead(null);
+ }
+ }
+// /**
+// * @see #getSingleReads()
+// */
+// //TODO private as long it is unclear how bidirectionality is handled
+// @SuppressWarnings("unused")
+// private void setSingleReadAlignments(Set<SingleReadAlignment> singleReadAlignments) {
+// this.singleReadAlignments = singleReadAlignments;
+// }
+
+// *********************** CONVENIENCE ***********************************/
+
+ /**
+ * Convenience method to add a single read to a consensus sequence
+ * by creating a {@link SingleReadAlignment}.
+ * @param singleRead the {@link SingleRead} to add
+ * @return the created SingleReadAlignment
+ */
+ public SingleReadAlignment addSingleRead(SingleRead singleRead) {
+ SingleReadAlignment alignment = SingleReadAlignment.NewInstance(this, singleRead);
+ return alignment;
+ }
+
public void removeSingleRead(SingleRead singleRead) {
- this.singleReads.remove(singleRead);
+ Set<SingleReadAlignment> toRemove = new HashSet<SingleReadAlignment>();
+ for (SingleReadAlignment align : this.singleReadAlignments){
+ if (align.getSingleRead() != null && align.getSingleRead().equals(singleRead)){
+ toRemove.add(align);
+ }
+ }
+ for (SingleReadAlignment align : toRemove){
+ removeSingleReadAlignment(align);
+ }
+ return;
}
+
/**
- * @see #getSingleReads()
+ * Convenience method that returns all single reads this consensus sequence
+ * is based on via {@link SingleReadAlignment}s.
+ * @return set of related single reads
*/
- //TODO private as long it is unclear how bidirectionality is handled
- private void setSingleReads(Set<SingleRead> singleReads) {
- this.singleReads = singleReads;
+ @XmlTransient
+ @Transient
+ public Set<SingleRead> getSingleReads(){
+ Set<SingleRead> singleReads = new HashSet<SingleRead>();
+ for (SingleReadAlignment align : this.singleReadAlignments){
+ if (align.getSingleRead() != null){ // == null should not happen
+ singleReads.add(align.getSingleRead());
+ }
+ }
+ return singleReads;
}
public void setSequenceString(String sequence) {
consensusSequence.setString(sequence);
}
-
+
/**
* Convenience method which computes the set of all related pherograms
* @return the set of pherograms.
@Transient
public Set<Media> getPherograms(){
Set<Media> result = new HashSet<Media>();
- for (SingleRead singleSeq : singleReads){
- if (singleSeq.getPherogram() != null){
- result.add(singleSeq.getPherogram());
+ for (SingleReadAlignment singleReadAlign : singleReadAlignments){
+ if (singleReadAlign.getSingleRead() != null && singleReadAlign.getSingleRead().getPherogram() != null){
+ result.add(singleReadAlign.getSingleRead().getPherogram());
}
}
return result;
}
-
+
//***** Registrations ************/
/**
* Returns the computed genBank uri.
- * @return
+ * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber}
+ * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
*/
@Transient
- public URI getGenBankUri() {
- return createExternalUri(GENBANK_BASE_URI);
+ public URI getGenBankUri() throws URISyntaxException {
+ return createExternalUri(GENBANK_BASE_URI, geneticAccessionNumber);
}
/**
* Returns the computed EMBL uri.
- * @return
+ * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber}
+ * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
*/
@Transient
- public URI getEmblUri() {
- return createExternalUri(EMBL_BASE_URI);
+ public URI getEmblUri() throws URISyntaxException {
+ return createExternalUri(EMBL_BASE_URI, geneticAccessionNumber);
}
/**
* Returns the computed DDBJ uri.
- * @return
+ * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber}
+ * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
*/
@Transient
- public URI getDdbjUri() {
- return createExternalUri(DDBJ_BASE_URI);
+ public URI getDdbjUri() throws URISyntaxException {
+ return createExternalUri(DDBJ_BASE_URI, geneticAccessionNumber);
}
-
+
/**
* Returns the URI for the BOLD entry.
+ * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId}
+ * @throws URISyntaxException when URI could not be created with {@link #boldProcessId}
* @see #getBoldProcessId()
*/
@Transient
- public URI getBoldUri() {
- return createExternalUri(BOLD_BASE_URI);
+ public URI getBoldUri() throws URISyntaxException {
+ return createExternalUri(BOLD_BASE_URI, boldProcessId);
}
-
- private URI createExternalUri(String baseUri){
- if (StringUtils.isNotBlank(geneticAccessionNumber)){
- return URI.create(String.format(baseUri, geneticAccessionNumber.trim()));
+ private URI createExternalUri(String baseUri, String id) throws URISyntaxException{
+ if (StringUtils.isNotBlank(id)){
+ return new URI(String.format(baseUri, id.trim()));
}else{
return null;
}
}
-
-
+
+
+
+
//*********************** CLONE ********************************************************/
- /**
+ /**
* Clones <i>this</i> sequence. This is a shortcut that enables to create
* a new instance that differs only slightly from <i>this</i> sequencing by
* modifying only some of the attributes.<BR><BR>
- *
- *
+ *
+ *
* @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
* @see java.lang.Object#clone()
*/
public Object clone() {
try{
Sequence result = (Sequence)super.clone();
-
+
//sequences
result.consensusSequence = (SequenceString)this.consensusSequence.clone();
result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
-
-
+
+
//single sequences
- result.singleReads = new HashSet<SingleRead>();
- for (SingleRead seq: this.singleReads){
- result.singleReads.add((SingleRead) seq);
+ result.singleReadAlignments = new HashSet<SingleReadAlignment>();
+ for (SingleReadAlignment singleReadAlign: this.singleReadAlignments){
+ SingleReadAlignment newAlignment = (SingleReadAlignment)singleReadAlign.clone();
+ result.singleReadAlignments.add(newAlignment);
}
-
+
//citations //TODO do we really want to copy these ??
result.citations = new HashSet<Reference>();
- for (Reference ref: this.citations){
- result.citations.add((Reference) ref);
+ for (Reference<?> ref: this.citations){
+ result.citations.add(ref);
}
-
-
-
+
+
+
return result;
}catch (CloneNotSupportedException e) {
logger.warn("Object does not implement cloneable");