X-Git-Url: https://dev.e-taxonomy.eu/gitweb/cdmlib.git/blobdiff_plain/18f0b4b7d519f02dc779df5675f15ce9ea732db0..395ccc976457450f5865a6fd2f7d2061819730d2:/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java diff --git a/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java b/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java index 4730edf233..87eef90d3b 100644 --- a/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java +++ b/cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/molecular/Sequence.java @@ -1,8 +1,8 @@ /** * Copyright (C) 2007 EDIT -* European Distributed Institute of Taxonomy +* European Distributed Institute of Taxonomy * http://www.e-taxonomy.eu -* +* * The contents of this file are subject to the Mozilla Public License Version 1.1 * See LICENSE.TXT at the top of this package for the full license terms. */ @@ -10,6 +10,7 @@ package eu.etaxonomy.cdm.model.molecular; import java.net.URI; +import java.net.URISyntaxException; import java.util.HashSet; import java.util.Set; @@ -17,6 +18,7 @@ import javax.persistence.Entity; import javax.persistence.FetchType; import javax.persistence.ManyToMany; import javax.persistence.ManyToOne; +import javax.persistence.OneToMany; import javax.persistence.Transient; import javax.validation.constraints.Size; import javax.xml.bind.annotation.XmlAccessType; @@ -27,6 +29,7 @@ import javax.xml.bind.annotation.XmlElementWrapper; import javax.xml.bind.annotation.XmlIDREF; import javax.xml.bind.annotation.XmlRootElement; import javax.xml.bind.annotation.XmlSchemaType; +import javax.xml.bind.annotation.XmlTransient; import javax.xml.bind.annotation.XmlType; import org.apache.log4j.Logger; @@ -44,16 +47,21 @@ import eu.etaxonomy.cdm.model.media.Media; import eu.etaxonomy.cdm.model.reference.Reference; /** - * Alignment of multiple single sequences to a consensus sequence, - * may also include the extracted barcode sequence. - * - * This class holds information about both the combining process of + * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence. + * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample}, + * while + * + *
This class holds information about both the combining process of * {@link SingleRead single sequences} to one consensus sequence - * (singleReads, contigFile) as well as sequence related information. - * The later includes the sequence string itself, important genetic information - * (marker, haplotype) as well as registration information (genetic accession number) - * citations and barcoding information. - * + * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} ) + * as well as sequence related information. + * The later includes the {@link #getConsensusSequence() sequence string} itself, + * important genetic information about the DNA that has been sequenced + * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as + * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ), + * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id}, + * {@link #getBarcodeSequencePart() barcode sequence}, ...). + * * @author m.doering * @created 08-Nov-2007 13:06:51 * @author a.mueller @@ -70,7 +78,7 @@ import eu.etaxonomy.cdm.model.reference.Reference; "boldProcessId", "haplotype", "contigFile", - "singleReads", + "singleReadAlignments", "citations" }) @XmlRootElement(name = "Sequencing") @@ -81,43 +89,36 @@ import eu.etaxonomy.cdm.model.reference.Reference; public class Sequence extends AnnotatableEntity implements Cloneable{ private static final long serialVersionUID = 8298983152731241775L; private static final Logger logger = Logger.getLogger(Sequence.class); - + + //TODO move to cdmlib-ext? private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s"; private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s"; private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html"; private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s"; - + @XmlElement( name = "DnaSample") @XmlIDREF @XmlSchemaType(name = "IDREF") @ManyToOne(fetch = FetchType.LAZY) @IndexedEmbedded private DnaSample dnaSample; - - + + /** @see #getContigFile() */ @XmlElement(name = "ContigFile") @XmlIDREF @XmlSchemaType(name = "IDREF") @ManyToOne(fetch = FetchType.LAZY) + @Cascade({CascadeType.SAVE_UPDATE}) private Media contigFile; - + /** @see #getConsensusSequence() */ @XmlElement(name = "ConsensusSequence") private SequenceString consensusSequence = SequenceString.NewInstance(); - -// /**{@link #getSequence()}*/ -// @XmlElement(name = "Sequence") -// @Lob -// private String sequence; -// -// @XmlElement(name = "Length") -// private Integer length; - - + @XmlAttribute(name = "isBarcode") private Boolean isBarcode = null; - + /** @see #getBarcodeSequence()*/ @XmlElement(name = "BarcodeSequencePart") private SequenceString barcodeSequencePart = SequenceString.NewInstance(); @@ -126,20 +127,20 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ @XmlElement(name = "GeneticAccessionNumber") @Size(max=20) private String geneticAccessionNumber; - + /** @see #getBoldProcessId() */ @XmlElement(name = "BoldProcessId") @Size(max=20) private String boldProcessId; - - @XmlElementWrapper(name = "SingleReads") - @XmlElement(name = "SingleRead") + + @XmlElementWrapper(name = "SingleReadAlignments") + @XmlElement(name = "SingleReadAlignment") @XmlIDREF @XmlSchemaType(name = "IDREF") - @ManyToMany(fetch = FetchType.LAZY) + @OneToMany(mappedBy="consensusAlignment", fetch = FetchType.LAZY) @Cascade({CascadeType.SAVE_UPDATE}) - private Set singleReads = new HashSet(); - + private Set singleReadAlignments = new HashSet(); + /** @see #getDnaMarker() */ @XmlElement(name = "DnaMarker") @XmlIDREF @@ -148,12 +149,12 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ //no cascade as it is a defined term private DefinedTerm dnaMarker; - + /** @see #getHaplotype() */ @XmlElement(name = "Haplotype") @Size(max=100) private String haplotype; - + /** @see #getCitations() */ @XmlElementWrapper(name = "Citations") @XmlElement(name = "Citation") @@ -162,47 +163,70 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ @ManyToMany(fetch = FetchType.LAZY) @Cascade({CascadeType.SAVE_UPDATE}) private Set citations = new HashSet(); - + // //should be calculated in case sequence is set // @XmlElement (name = "DateSequenced", type= String.class) // @XmlJavaTypeAdapter(DateTimeAdapter.class) // @Type(type="dateTimeUserType") // @Basic(fetch = FetchType.LAZY) // private DateTime dateSequenced; - - + + //*********************** FACTORY ****************************************************/ - + public static Sequence NewInstance(String consensusSequence){ Sequence result = new Sequence(); result.setSequenceString(consensusSequence); return result; } - + + + public static Sequence NewInstance(String consensusSequence, Integer length){ + Sequence result = NewInstance(consensusSequence); + result.getConsensusSequence().setLength(length); + return result; + } + + public static Sequence NewInstance(DnaSample dnaSample, String consensusSequence, Integer length){ + Sequence result = NewInstance(consensusSequence); + result.getConsensusSequence().setLength(length); + dnaSample.addSequence(result); + + return result; + } //*********************** CONSTRUCTOR ****************************************************/ - + protected Sequence() {} //*********************** GETTER / SETTER ****************************************************/ - + /** - * The {@link DnaSample dna sample} this sequencing belongs too. + * The {@link DnaSample dna sample} this sequencing belongs too. */ public DnaSample getDnaSample() { return dnaSample; } - //TODO bidirectionality?? /** + * To be called only from {@link DnaSample#addSequence(Sequence)} * @see #getDnaSample() */ - private void setDnaSample(DnaSample dnaSample) { + //TODO implement full bidirectionality + protected void setDnaSample(DnaSample dnaSample) { this.dnaSample = dnaSample; + if (dnaSample != null && !dnaSample.getSequences().contains(this)){ + throw new RuntimeException("Don't use DNA setter"); + } } /** - * The consensus sequence achieved by this sequencing. + * The resulting consensus sequence represened by this {@link Sequence sequence} . + * The consensus is usually computed from the {@link SingleRead single reads}. + * The result of which is stored in a file called {@link #getContigFile() contig file} + * + * #see {@link #getContigFile()} + * #see {@link #getSingleReads()} */ public SequenceString getConsensusSequence() { return consensusSequence; @@ -218,18 +242,18 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ } this.consensusSequence = sequenceString; } - + /** - * The isBarcode flag should be set to true if this (consensus) sequence is or includes - * a barcode sequence. If the barcode sequence is only a part of the consensus sequence - * this part is to be stored as {@link #getBarcodeSequencePart() barcode sequence part}. + * The isBarcode flag should be set to true if this (consensus) sequence is or includes + * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence + * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}. * A isBarcode value of null indicates that we do have no knowledge - * wether the sequence is a barcoding sequence or not. - * + * whether the sequence is a barcoding sequence or not. + * * @see #getBarcodeSequencePart() * @see #getSequenceString() * @returns the isBarcode flag value (tri-state) - * + * */ public Boolean getIsBarcode() { return isBarcode; @@ -244,10 +268,10 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ } /** - * If the barcode sequence string does not include 100% of the (consensus) sequence + * If the barcode sequence string does not include 100% of the (consensus) sequence * the part used as barcode is provided here. However, the barcode part - * should be kept if consensus sequence string and barcode sequence string are equal. - * + * should be kept empty if consensus sequence string and barcode sequence string are equal. + * * @see #getIsBarcode() */ public SequenceString getBarcodeSequencePart() { @@ -255,17 +279,21 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ } /** - * @see #getBarcodeSequence() + * @see #getBarcodeSequencePart() */ - public void setBarcodeSequence(SequenceString barcodeSequencePart) { + public void setBarcodeSequencePart(SequenceString barcodeSequencePart) { if (barcodeSequencePart == null){ barcodeSequencePart = SequenceString.NewInstance(); } this.barcodeSequencePart = barcodeSequencePart; } - + /** - * Sets the {@link TermType#DnaMarker marker} examined and described by this sequencing. + * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing. + * The marker should usually be similar to the one used in the according {@link Amplification + * amplification process}. However, it may slightly differ, or, if multiple amplifications where + * used to build this consensus sequence it may be the super set of the markers used in amplification. + * * @return */ public DefinedTerm getDnaMarker(){ @@ -281,7 +309,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ } /** - * The accession number used in GenBank, EMBL and DDBJ. + * The accession number used in GenBank, EMBL and DDBJ. * @return */ public String getGeneticAccessionNumber() { @@ -295,7 +323,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ public void setGeneticAccessionNumber(String geneticAccessionNumber) { this.geneticAccessionNumber = geneticAccessionNumber; } - + /** * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/). @@ -327,6 +355,9 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ /** * The contigFile containing all data and data processing for this sequencing. + * + * @see #getConsensusSequence() + * @see #getSingleReads() */ public Media getContigFile() { return contigFile; @@ -338,14 +369,14 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ public void setContigFile(Media contigFile) { this.contigFile = contigFile; } - - + + /** * Citations are the set of references in which this sequence was published. * Unlike taxonomic names the first publication of a sequence * is not so important (maybe because it is required by publishers - * that they are all registered at Genbank) therefore we do not have something like an - * "original reference" attribute.
+ * that they are all registered at Genbank) therefore we do not have something like an + * "original reference" attribute.
* Links to these references are to be stored within the reference itself. * @return the set of references in which this sequence was published. */ @@ -372,29 +403,82 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ } /** - * The single reads that where used to create this consensus sequence. + * The {@link SingleRead single reads} that were used to build this consensus sequence. + * + * @see #getConsensusSequence() + * @see #getContigFile() */ - public Set getSingleReads() { - return singleReads; + public Set getSingleReadAlignments() { + return singleReadAlignments; } /** * @see #getSingleReads() */ - public void addSingleRead(SingleRead singleRead) { - this.singleReads.add(singleRead); + public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment) { + this.singleReadAlignments.add(singleReadAlignment); + if (! this.equals(singleReadAlignment.getConsensusSequence())){ + singleReadAlignment.setConsensusAlignment(this); + }; } /** * @see #getSingleReads() */ + public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment) { + this.singleReadAlignments.remove(singleReadAlignment); + if (this.equals(singleReadAlignment.getConsensusSequence())){ + singleReadAlignment.setConsensusAlignment(null); + } + } +// /** +// * @see #getSingleReads() +// */ +// //TODO private as long it is unclear how bidirectionality is handled +// @SuppressWarnings("unused") +// private void setSingleReadAlignments(Set singleReadAlignments) { +// this.singleReadAlignments = singleReadAlignments; +// } + +// *********************** CONVENIENCE ***********************************/ + + /** + * Convenience method to add a single read to a consensus sequence + * by creating a {@link SingleReadAlignment}. + * @param singleRead the {@link SingleRead} to add + * @return the created SingleReadAlignment + */ + public SingleReadAlignment addSingleRead(SingleRead singleRead) { + SingleReadAlignment alignment = SingleReadAlignment.NewInstance(this, singleRead); + return alignment; + } + public void removeSingleRead(SingleRead singleRead) { - this.singleReads.remove(singleRead); + Set toRemove = new HashSet(); + for (SingleReadAlignment align : this.singleReadAlignments){ + if (align.getSingleRead() != null && align.getSingleRead().equals(singleRead)){ + toRemove.add(align); + } + } + for (SingleReadAlignment align : toRemove){ + removeSingleReadAlignment(align); + } + return; } + /** - * @see #getSingleReads() + * Convenience method that returns all single reads this consensus sequence + * is based on via {@link SingleReadAlignment}s. + * @return set of related single reads */ - //TODO private as long it is unclear how bidirectionality is handled - private void setSingleReads(Set singleReads) { - this.singleReads = singleReads; + @XmlTransient + @Transient + public Set getSingleReads(){ + Set singleReads = new HashSet(); + for (SingleReadAlignment align : this.singleReadAlignments){ + if (align.getSingleRead() != null){ // == null should not happen + singleReads.add(align.getSingleRead()); + } + } + return singleReads; } @@ -417,7 +501,7 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ public void setSequenceString(String sequence) { consensusSequence.setString(sequence); } - + /** * Convenience method which computes the set of all related pherograms * @return the set of pherograms. @@ -425,69 +509,75 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ @Transient public Set getPherograms(){ Set result = new HashSet(); - for (SingleRead singleSeq : singleReads){ - if (singleSeq.getPherogram() != null){ - result.add(singleSeq.getPherogram()); + for (SingleReadAlignment singleReadAlign : singleReadAlignments){ + if (singleReadAlign.getSingleRead() != null && singleReadAlign.getSingleRead().getPherogram() != null){ + result.add(singleReadAlign.getSingleRead().getPherogram()); } } return result; } - + //***** Registrations ************/ /** * Returns the computed genBank uri. - * @return + * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber} + * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber} */ @Transient - public URI getGenBankUri() { - return createExternalUri(GENBANK_BASE_URI); + public URI getGenBankUri() throws URISyntaxException { + return createExternalUri(GENBANK_BASE_URI, geneticAccessionNumber); } /** * Returns the computed EMBL uri. - * @return + * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber} + * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber} */ @Transient - public URI getEmblUri() { - return createExternalUri(EMBL_BASE_URI); + public URI getEmblUri() throws URISyntaxException { + return createExternalUri(EMBL_BASE_URI, geneticAccessionNumber); } /** * Returns the computed DDBJ uri. - * @return + * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber} + * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber} */ @Transient - public URI getDdbjUri() { - return createExternalUri(DDBJ_BASE_URI); + public URI getDdbjUri() throws URISyntaxException { + return createExternalUri(DDBJ_BASE_URI, geneticAccessionNumber); } - + /** * Returns the URI for the BOLD entry. + * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId} + * @throws URISyntaxException when URI could not be created with {@link #boldProcessId} * @see #getBoldProcessId() */ @Transient - public URI getBoldUri() { - return createExternalUri(BOLD_BASE_URI); + public URI getBoldUri() throws URISyntaxException { + return createExternalUri(BOLD_BASE_URI, boldProcessId); } - - private URI createExternalUri(String baseUri){ - if (StringUtils.isNotBlank(geneticAccessionNumber)){ - return URI.create(String.format(baseUri, geneticAccessionNumber.trim())); + private URI createExternalUri(String baseUri, String id) throws URISyntaxException{ + if (StringUtils.isNotBlank(id)){ + return new URI(String.format(baseUri, id.trim())); }else{ return null; } } - - + + + + //*********************** CLONE ********************************************************/ - /** + /** * Clones this sequence. This is a shortcut that enables to create * a new instance that differs only slightly from this sequencing by * modifying only some of the attributes.

- * - * + * + * * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone() * @see java.lang.Object#clone() */ @@ -495,26 +585,27 @@ public class Sequence extends AnnotatableEntity implements Cloneable{ public Object clone() { try{ Sequence result = (Sequence)super.clone(); - + //sequences result.consensusSequence = (SequenceString)this.consensusSequence.clone(); result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone(); - - + + //single sequences - result.singleReads = new HashSet(); - for (SingleRead seq: this.singleReads){ - result.singleReads.add((SingleRead) seq); + result.singleReadAlignments = new HashSet(); + for (SingleReadAlignment singleReadAlign: this.singleReadAlignments){ + SingleReadAlignment newAlignment = (SingleReadAlignment)singleReadAlign.clone(); + result.singleReadAlignments.add(newAlignment); } - + //citations //TODO do we really want to copy these ?? result.citations = new HashSet(); - for (Reference ref: this.citations){ - result.citations.add((Reference) ref); + for (Reference ref: this.citations){ + result.citations.add(ref); } - - - + + + return result; }catch (CloneNotSupportedException e) { logger.warn("Object does not implement cloneable");