fixing test for SingleReadAlignment and removal of SpecAndObs_Sequence
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / model / molecular / Sequence.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.model.molecular;
10
11
12 import java.net.URI;
13 import java.net.URISyntaxException;
14 import java.util.HashSet;
15 import java.util.Set;
16
17 import javax.persistence.Entity;
18 import javax.persistence.FetchType;
19 import javax.persistence.ManyToMany;
20 import javax.persistence.ManyToOne;
21 import javax.persistence.OneToMany;
22 import javax.persistence.Transient;
23 import javax.validation.constraints.Size;
24 import javax.xml.bind.annotation.XmlAccessType;
25 import javax.xml.bind.annotation.XmlAccessorType;
26 import javax.xml.bind.annotation.XmlAttribute;
27 import javax.xml.bind.annotation.XmlElement;
28 import javax.xml.bind.annotation.XmlElementWrapper;
29 import javax.xml.bind.annotation.XmlIDREF;
30 import javax.xml.bind.annotation.XmlRootElement;
31 import javax.xml.bind.annotation.XmlSchemaType;
32 import javax.xml.bind.annotation.XmlTransient;
33 import javax.xml.bind.annotation.XmlType;
34
35 import org.apache.log4j.Logger;
36 import org.codehaus.plexus.util.StringUtils;
37 import org.hibernate.annotations.Cascade;
38 import org.hibernate.annotations.CascadeType;
39 import org.hibernate.envers.Audited;
40 import org.hibernate.search.annotations.IndexedEmbedded;
41 import org.springframework.beans.factory.annotation.Configurable;
42
43 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
44 import eu.etaxonomy.cdm.model.common.DefinedTerm;
45 import eu.etaxonomy.cdm.model.common.TermType;
46 import eu.etaxonomy.cdm.model.media.Media;
47 import eu.etaxonomy.cdm.model.reference.Reference;
48
49 /**
50 * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
51 * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
52 * while
53 *
54 * <BR>This class holds information about both the combining process of
55 * {@link SingleRead single sequences} to one consensus sequence
56 * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
57 * as well as sequence related information.
58 * The later includes the {@link #getConsensusSequence() sequence string} itself,
59 * important genetic information about the DNA that has been sequenced
60 * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
61 * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
62 * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
63 * {@link #getBarcodeSequencePart() barcode sequence}, ...).
64 *
65 * @author m.doering
66 * @created 08-Nov-2007 13:06:51
67 * @author a.mueller
68 * @updated 11-Jul-2013
69 */
70 @XmlAccessorType(XmlAccessType.FIELD)
71 @XmlType(name = "Sequence", propOrder = {
72 "dnaSample",
73 "consensusSequence",
74 "isBarcode",
75 "barcodeSequencePart",
76 "dnaMarker",
77 "geneticAccessionNumber",
78 "boldProcessId",
79 "haplotype",
80 "contigFile",
81 "singleReadAlignments",
82 "citations"
83 })
84 @XmlRootElement(name = "Sequencing")
85 @Entity
86 @Audited
87 @Configurable
88 //@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
89 public class Sequence extends AnnotatableEntity implements Cloneable{
90 private static final long serialVersionUID = 8298983152731241775L;
91 private static final Logger logger = Logger.getLogger(Sequence.class);
92
93 //TODO move to cdmlib-ext?
94 private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
95 private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
96 private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
97 private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
98
99 @XmlElement( name = "DnaSample")
100 @XmlIDREF
101 @XmlSchemaType(name = "IDREF")
102 @ManyToOne(fetch = FetchType.LAZY)
103 @IndexedEmbedded
104 private DnaSample dnaSample;
105
106
107 /** @see #getContigFile() */
108 @XmlElement(name = "ContigFile")
109 @XmlIDREF
110 @XmlSchemaType(name = "IDREF")
111 @ManyToOne(fetch = FetchType.LAZY)
112 @Cascade({CascadeType.SAVE_UPDATE})
113 private Media contigFile;
114
115 /** @see #getConsensusSequence() */
116 @XmlElement(name = "ConsensusSequence")
117 private SequenceString consensusSequence = SequenceString.NewInstance();
118
119 @XmlAttribute(name = "isBarcode")
120 private Boolean isBarcode = null;
121
122 /** @see #getBarcodeSequence()*/
123 @XmlElement(name = "BarcodeSequencePart")
124 private SequenceString barcodeSequencePart = SequenceString.NewInstance();
125
126 /** @see #getGeneticAccessionNumber()*/
127 @XmlElement(name = "GeneticAccessionNumber")
128 @Size(max=20)
129 private String geneticAccessionNumber;
130
131 /** @see #getBoldProcessId() */
132 @XmlElement(name = "BoldProcessId")
133 @Size(max=20)
134 private String boldProcessId;
135
136 @XmlElementWrapper(name = "SingleReadAlignments")
137 @XmlElement(name = "SingleReadAlignment")
138 @XmlIDREF
139 @XmlSchemaType(name = "IDREF")
140 @OneToMany(mappedBy="consensusSequence", fetch = FetchType.LAZY)
141 @Cascade({CascadeType.SAVE_UPDATE})
142 private Set<SingleReadAlignment> singleReadAlignments = new HashSet<SingleReadAlignment>();
143
144 /** @see #getDnaMarker() */
145 @XmlElement(name = "DnaMarker")
146 @XmlIDREF
147 @XmlSchemaType(name = "IDREF")
148 @ManyToOne(fetch = FetchType.LAZY)
149 //no cascade as it is a defined term
150 private DefinedTerm dnaMarker;
151
152
153 /** @see #getHaplotype() */
154 @XmlElement(name = "Haplotype")
155 @Size(max=100)
156 private String haplotype;
157
158 /** @see #getCitations() */
159 @XmlElementWrapper(name = "Citations")
160 @XmlElement(name = "Citation")
161 @XmlIDREF
162 @XmlSchemaType(name = "IDREF")
163 @ManyToMany(fetch = FetchType.LAZY)
164 @Cascade({CascadeType.SAVE_UPDATE})
165 private Set<Reference> citations = new HashSet<Reference>();
166
167 // //should be calculated in case sequence is set
168 // @XmlElement (name = "DateSequenced", type= String.class)
169 // @XmlJavaTypeAdapter(DateTimeAdapter.class)
170 // @Type(type="dateTimeUserType")
171 // @Basic(fetch = FetchType.LAZY)
172 // private DateTime dateSequenced;
173
174
175 //*********************** FACTORY ****************************************************/
176
177 public static Sequence NewInstance(String consensusSequence){
178 Sequence result = new Sequence();
179 result.setSequenceString(consensusSequence);
180 return result;
181 }
182
183
184 public static Sequence NewInstance(String consensusSequence, Integer length){
185 Sequence result = NewInstance(consensusSequence);
186 result.getConsensusSequence().setLength(length);
187 return result;
188 }
189
190 public static Sequence NewInstance(DnaSample dnaSample, String consensusSequence, Integer length){
191 Sequence result = NewInstance(consensusSequence);
192 result.getConsensusSequence().setLength(length);
193 dnaSample.addSequence(result);
194
195 return result;
196 }
197 //*********************** CONSTRUCTOR ****************************************************/
198
199 protected Sequence() {}
200
201 //*********************** GETTER / SETTER ****************************************************/
202
203
204 /**
205 * The {@link DnaSample dna sample} this sequencing belongs too.
206 */
207 public DnaSample getDnaSample() {
208 return dnaSample;
209 }
210
211 /**
212 * To be called only from {@link DnaSample#addSequence(Sequence)}
213 * @see #getDnaSample()
214 */
215 //TODO implement full bidirectionality
216 protected void setDnaSample(DnaSample dnaSample) {
217 this.dnaSample = dnaSample;
218 if (dnaSample != null && !dnaSample.getSequences().contains(this)){
219 throw new RuntimeException("Don't use DNA setter");
220 }
221 }
222
223 /**
224 * The resulting consensus sequence represened by this {@link Sequence sequence} .
225 * The consensus is usually computed from the {@link SingleRead single reads}.
226 * The result of which is stored in a file called {@link #getContigFile() contig file}
227 *
228 * #see {@link #getContigFile()}
229 * #see {@link #getSingleReads()}
230 */
231 public SequenceString getConsensusSequence() {
232 return consensusSequence;
233 }
234
235
236 /**
237 * @see #getConsensusSequence()
238 */
239 public void setConsensusSequence(SequenceString sequenceString) {
240 if (sequenceString == null){
241 sequenceString = SequenceString.NewInstance();
242 }
243 this.consensusSequence = sequenceString;
244 }
245
246 /**
247 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
248 * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
249 * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
250 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
251 * whether the sequence is a barcoding sequence or not.
252 *
253 * @see #getBarcodeSequencePart()
254 * @see #getSequenceString()
255 * @returns the isBarcode flag value (tri-state)
256 *
257 */
258 public Boolean getIsBarcode() {
259 return isBarcode;
260 }
261
262 /**
263 * @see #getIsBarcode()
264 * @see #getBarcodeSequencePart()
265 */
266 public void setIsBarcode(Boolean isBarcode) {
267 this.isBarcode = isBarcode;
268 }
269
270 /**
271 * If the barcode sequence string does not include 100% of the (consensus) sequence
272 * the part used as barcode is provided here. However, the barcode part
273 * should be kept empty if consensus sequence string and barcode sequence string are equal.
274 *
275 * @see #getIsBarcode()
276 */
277 public SequenceString getBarcodeSequencePart() {
278 return barcodeSequencePart;
279 }
280
281 /**
282 * @see #getBarcodeSequencePart()
283 */
284 public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
285 if (barcodeSequencePart == null){
286 barcodeSequencePart = SequenceString.NewInstance();
287 }
288 this.barcodeSequencePart = barcodeSequencePart;
289 }
290
291 /**
292 * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
293 * The marker should usually be similar to the one used in the according {@link Amplification
294 * amplification process}. However, it may slightly differ, or, if multiple amplifications where
295 * used to build this consensus sequence it may be the super set of the markers used in amplification.
296 *
297 * @return
298 */
299 public DefinedTerm getDnaMarker(){
300 return this.dnaMarker;
301 }
302
303 /**
304 * @see #getDnaMarker()
305 * @param marker
306 */
307 public void setDnaMarker(DefinedTerm dnaMarker){
308 this.dnaMarker = dnaMarker;
309 }
310
311 /**
312 * The accession number used in GenBank, EMBL and DDBJ.
313 * @return
314 */
315 public String getGeneticAccessionNumber() {
316 return geneticAccessionNumber;
317 }
318
319 /**
320 * Sets the genetic accession number.
321 * @see #getGeneticAccessionNumber()
322 */
323 public void setGeneticAccessionNumber(String geneticAccessionNumber) {
324 this.geneticAccessionNumber = geneticAccessionNumber;
325 }
326
327
328 /**
329 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
330 */
331 public String getBoldProcessId() {
332 return boldProcessId;
333 }
334
335 public void setBoldProcessId(String boldProcessId) {
336 this.boldProcessId = boldProcessId;
337 }
338
339 /**
340 * Returns the name of the haplotype.
341 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
342 * A certain haplotype may be specific for an individual, a population or a species.
343 * @return
344 */
345 public String getHaplotype() {
346 return haplotype;
347 }
348
349 /**
350 * @see #getHaplotype()
351 */
352 public void setHaplotype(String haplotype) {
353 this.haplotype = haplotype;
354 }
355
356 /**
357 * The contigFile containing all data and data processing for this sequencing.
358 *
359 * @see #getConsensusSequence()
360 * @see #getSingleReads()
361 */
362 public Media getContigFile() {
363 return contigFile;
364 }
365
366 /**
367 * @see #getContigFile()
368 */
369 public void setContigFile(Media contigFile) {
370 this.contigFile = contigFile;
371 }
372
373
374 /**
375 * Citations are the set of references in which this sequence was published.
376 * Unlike taxonomic names the first publication of a sequence
377 * is not so important (maybe because it is required by publishers
378 * that they are all registered at Genbank) therefore we do not have something like an
379 * "original reference" attribute.<BR>
380 * Links to these references are to be stored within the reference itself.
381 * @return the set of references in which this sequence was published.
382 */
383 public Set<Reference> getCitations() {
384 return citations;
385 }
386 /**
387 * @see #getCitations()
388 */
389 protected void setCitations(Set<Reference> citations) {
390 this.citations = citations;
391 }
392 /**
393 * @see #getCitations()
394 */
395 public void addCitation(Reference citation) {
396 this.citations.add(citation);
397 }
398 /**
399 * @see #getCitations()
400 */
401 public void removeCitation(Reference citation) {
402 this.citations.remove(citation);
403 }
404
405 /**
406 * The {@link SingleRead single reads} that were used to build this consensus sequence.
407 *
408 * @see #getConsensusSequence()
409 * @see #getContigFile()
410 */
411 public Set<SingleReadAlignment> getSingleReadAlignments() {
412 return singleReadAlignments;
413 }
414 /**
415 * @see #getSingleReads()
416 */
417 public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
418 this.singleReadAlignments.add(singleReadAlignment);
419 if (! this.equals(singleReadAlignment.getConsensusSequence())){
420 singleReadAlignment.setConsensusSequence(this);
421 };
422 }
423 /**
424 * @see #getSingleReads()
425 */
426 public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
427 this.singleReadAlignments.remove(singleReadAlignment);
428 if (this.equals(singleReadAlignment.getConsensusSequence())){
429 singleReadAlignment.setConsensusSequence(null);
430 }
431 }
432 // /**
433 // * @see #getSingleReads()
434 // */
435 // //TODO private as long it is unclear how bidirectionality is handled
436 // @SuppressWarnings("unused")
437 // private void setSingleReadAlignments(Set<SingleReadAlignment> singleReadAlignments) {
438 // this.singleReadAlignments = singleReadAlignments;
439 // }
440
441 // *********************** CONVENIENCE ***********************************/
442
443 /**
444 * Convenience method to add a single read to a consensus sequence
445 * by creating a {@link SingleReadAlignment}.
446 * @param singleRead the {@link SingleRead} to add
447 * @return the created SingleReadAlignment
448 */
449 public SingleReadAlignment addSingleRead(SingleRead singleRead) {
450 SingleReadAlignment alignment = SingleReadAlignment.NewInstance(this, singleRead);
451 return alignment;
452 }
453
454 public void removeSingleRead(SingleRead singleRead) {
455 Set<SingleReadAlignment> toRemove = new HashSet<SingleReadAlignment>();
456 for (SingleReadAlignment align : this.singleReadAlignments){
457 if (align.getSingleRead() != null && align.getSingleRead().equals(singleRead)){
458 toRemove.add(align);
459 }
460 }
461 for (SingleReadAlignment align : toRemove){
462 removeSingleReadAlignment(align);
463 }
464 return;
465 }
466
467 /**
468 * Convenience method that returns all single reads this consensus sequence
469 * is based on via {@link SingleReadAlignment}s.
470 * @return set of related single reads
471 */
472 @XmlTransient
473 @Transient
474 public Set<SingleRead> getSingleReads(){
475 Set<SingleRead> singleReads = new HashSet<SingleRead>();
476 for (SingleReadAlignment align : this.singleReadAlignments){
477 if (align.getSingleRead() != null){ // == null should not happen
478 singleReads.add(align.getSingleRead());
479 }
480 }
481 return singleReads;
482 }
483
484
485 //*************************** Transient GETTER /SETTER *****************************/
486
487 /**
488 * Delegate method to get the text representation of the consensus sequence
489 * @see #setSequenceString(String)
490 */
491 @Transient
492 public String getSequenceString() {
493 return consensusSequence.getString();
494 }
495
496 /**
497 * Delegate method to set the text representation of the {@link #getConsensusSequence()
498 * consensus sequence}.
499 */
500 @Transient
501 public void setSequenceString(String sequence) {
502 consensusSequence.setString(sequence);
503 }
504
505 /**
506 * Convenience method which computes the set of all related pherograms
507 * @return the set of pherograms.
508 */
509 @Transient
510 public Set<Media> getPherograms(){
511 Set<Media> result = new HashSet<Media>();
512 for (SingleReadAlignment singleReadAlign : singleReadAlignments){
513 if (singleReadAlign.getSingleRead() != null && singleReadAlign.getSingleRead().getPherogram() != null){
514 result.add(singleReadAlign.getSingleRead().getPherogram());
515 }
516 }
517 return result;
518 }
519
520
521 //***** Registrations ************/
522 /**
523 * Returns the computed genBank uri.
524 * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber}
525 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
526 */
527 @Transient
528 public URI getGenBankUri() throws URISyntaxException {
529 return createExternalUri(GENBANK_BASE_URI, geneticAccessionNumber);
530 }
531
532 /**
533 * Returns the computed EMBL uri.
534 * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber}
535 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
536 */
537 @Transient
538 public URI getEmblUri() throws URISyntaxException {
539 return createExternalUri(EMBL_BASE_URI, geneticAccessionNumber);
540 }
541
542 /**
543 * Returns the computed DDBJ uri.
544 * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber}
545 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
546 */
547 @Transient
548 public URI getDdbjUri() throws URISyntaxException {
549 return createExternalUri(DDBJ_BASE_URI, geneticAccessionNumber);
550 }
551
552 /**
553 * Returns the URI for the BOLD entry.
554 * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId}
555 * @throws URISyntaxException when URI could not be created with {@link #boldProcessId}
556 * @see #getBoldProcessId()
557 */
558 @Transient
559 public URI getBoldUri() throws URISyntaxException {
560 return createExternalUri(BOLD_BASE_URI, boldProcessId);
561 }
562
563 private URI createExternalUri(String baseUri, String id) throws URISyntaxException{
564 if (StringUtils.isNotBlank(id)){
565 return new URI(String.format(baseUri, id.trim()));
566 }else{
567 return null;
568 }
569 }
570
571
572
573
574 //*********************** CLONE ********************************************************/
575 /**
576 * Clones <i>this</i> sequence. This is a shortcut that enables to create
577 * a new instance that differs only slightly from <i>this</i> sequencing by
578 * modifying only some of the attributes.<BR><BR>
579 *
580 *
581 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
582 * @see java.lang.Object#clone()
583 */
584 @Override
585 public Object clone() {
586 try{
587 Sequence result = (Sequence)super.clone();
588
589 //sequences
590 result.consensusSequence = (SequenceString)this.consensusSequence.clone();
591 result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
592
593
594 //single sequences
595 result.singleReadAlignments = new HashSet<SingleReadAlignment>();
596 for (SingleReadAlignment singleReadAlign: this.singleReadAlignments){
597 SingleReadAlignment newAlignment = (SingleReadAlignment)singleReadAlign.clone();
598 result.singleReadAlignments.add(newAlignment);
599 }
600
601 //citations //TODO do we really want to copy these ??
602 result.citations = new HashSet<Reference>();
603 for (Reference<?> ref: this.citations){
604 result.citations.add(ref);
605 }
606
607
608
609 return result;
610 }catch (CloneNotSupportedException e) {
611 logger.warn("Object does not implement cloneable");
612 e.printStackTrace();
613 return null;
614 }
615 }
616
617
618 }