Project

General

Profile

Download (18.9 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.model.molecular;
10

    
11

    
12
import java.net.URI;
13
import java.net.URISyntaxException;
14
import java.util.HashSet;
15
import java.util.Set;
16

    
17
import javax.persistence.Column;
18
import javax.persistence.Entity;
19
import javax.persistence.FetchType;
20
import javax.persistence.ManyToMany;
21
import javax.persistence.ManyToOne;
22
import javax.persistence.OneToMany;
23
import javax.persistence.Transient;
24
import javax.xml.bind.annotation.XmlAccessType;
25
import javax.xml.bind.annotation.XmlAccessorType;
26
import javax.xml.bind.annotation.XmlAttribute;
27
import javax.xml.bind.annotation.XmlElement;
28
import javax.xml.bind.annotation.XmlElementWrapper;
29
import javax.xml.bind.annotation.XmlIDREF;
30
import javax.xml.bind.annotation.XmlRootElement;
31
import javax.xml.bind.annotation.XmlSchemaType;
32
import javax.xml.bind.annotation.XmlTransient;
33
import javax.xml.bind.annotation.XmlType;
34

    
35
import org.apache.log4j.Logger;
36
import org.hibernate.annotations.Cascade;
37
import org.hibernate.annotations.CascadeType;
38
import org.hibernate.envers.Audited;
39
import org.hibernate.search.annotations.IndexedEmbedded;
40
import org.springframework.beans.factory.annotation.Configurable;
41

    
42
import eu.etaxonomy.cdm.common.CdmUtils;
43
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
44
import eu.etaxonomy.cdm.model.media.Media;
45
import eu.etaxonomy.cdm.model.reference.Reference;
46
import eu.etaxonomy.cdm.model.term.DefinedTerm;
47
import eu.etaxonomy.cdm.model.term.TermType;
48

    
49
/**
50
 * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
51
 * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
52
 * while
53
 *
54
 * <BR>This class holds information about both the combining process of
55
 * {@link SingleRead single sequences} to one consensus sequence
56
 * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
57
 * as well as sequence related information.
58
 * The later includes the {@link #getConsensusSequence() sequence string} itself,
59
 * important genetic information about the DNA that has been sequenced
60
 * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
61
 * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
62
 * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
63
 * {@link #getBarcodeSequencePart() barcode sequence}, ...).
64
 *
65
 * @author m.doering
66
 * @since 08-Nov-2007 13:06:51
67
 * @author a.mueller
68
 * @updated 11-Jul-2013
69
 */
70
@XmlAccessorType(XmlAccessType.FIELD)
71
@XmlType(name = "Sequence", propOrder = {
72
    "dnaSample",
73
	"consensusSequence",
74
	"isBarcode",
75
    "barcodeSequencePart",
76
    "dnaMarker",
77
    "geneticAccessionNumber",
78
    "boldProcessId",
79
    "haplotype",
80
    "contigFile",
81
    "singleReadAlignments",
82
    "citations"
83
})
84
@XmlRootElement(name = "Sequencing")
85
@Entity
86
@Audited
87
@Configurable
88
//@Table(name="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnList = "titleCache") })
89
public class Sequence
90
               extends AnnotatableEntity
91
               implements Cloneable{
92

    
93
	private static final long serialVersionUID = 8298983152731241775L;
94
	private static final Logger logger = Logger.getLogger(Sequence.class);
95

    
96
	//TODO move to cdmlib-ext?
97
	private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
98
	private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
99
	private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
100
	private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
101

    
102
    @XmlElement( name = "DnaSample")
103
    @XmlIDREF
104
    @XmlSchemaType(name = "IDREF")
105
    @ManyToOne(fetch = FetchType.LAZY)
106
    @IndexedEmbedded
107
    private DnaSample dnaSample;
108

    
109

    
110
	/** @see #getContigFile() */
111
	@XmlElement(name = "ContigFile")
112
    @XmlIDREF
113
    @XmlSchemaType(name = "IDREF")
114
    @ManyToOne(fetch = FetchType.LAZY)
115
	@Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
116
	private Media contigFile;
117

    
118
	/** @see #getConsensusSequence() */
119
	@XmlElement(name = "ConsensusSequence")
120
    private SequenceString consensusSequence = SequenceString.NewInstance();
121

    
122
	@XmlAttribute(name = "isBarcode")
123
	private Boolean isBarcode = null;
124

    
125
	/** @see #getBarcodeSequence()*/
126
	@XmlElement(name = "BarcodeSequencePart")
127
    private SequenceString barcodeSequencePart = SequenceString.NewInstance();
128

    
129
	/** @see #getGeneticAccessionNumber()*/
130
	@XmlElement(name = "GeneticAccessionNumber")
131
    @Column(length=20)
132
	private String geneticAccessionNumber;
133

    
134
	/** @see #getBoldProcessId() */
135
	@XmlElement(name = "BoldProcessId")
136
    @Column(length=20)
137
	private String boldProcessId;
138

    
139
    @XmlElementWrapper(name = "SingleReadAlignments")
140
    @XmlElement(name = "SingleReadAlignment")
141
    @XmlIDREF
142
    @XmlSchemaType(name = "IDREF")
143
    @OneToMany(mappedBy="consensusAlignment", fetch = FetchType.LAZY, orphanRemoval=true)
144
    @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
145
	private Set<SingleReadAlignment> singleReadAlignments = new HashSet<SingleReadAlignment>();
146

    
147
	/** @see #getDnaMarker() */
148
	@XmlElement(name = "DnaMarker")
149
    @XmlIDREF
150
    @XmlSchemaType(name = "IDREF")
151
    @ManyToOne(fetch = FetchType.LAZY)
152
	//no cascade as it is a defined term
153
	private DefinedTerm dnaMarker;
154

    
155

    
156
	/** @see #getHaplotype() */
157
	@XmlElement(name = "Haplotype")
158
    @Column(length=100)
159
	private String haplotype;
160

    
161
	/** @see #getCitations() */
162
	@XmlElementWrapper(name = "Citations")
163
    @XmlElement(name = "Citation")
164
    @XmlIDREF
165
    @XmlSchemaType(name = "IDREF")
166
    @ManyToMany(fetch = FetchType.LAZY)
167
    @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
168
	private Set<Reference> citations = new HashSet<>();
169

    
170
//	//should be calculated in case sequence is set
171
//	@XmlElement (name = "DateSequenced", type= String.class)
172
//	@XmlJavaTypeAdapter(DateTimeAdapter.class)
173
//	@Type(type="dateTimeUserType")
174
//	@Basic(fetch = FetchType.LAZY)
175
//	private DateTime dateSequenced;
176

    
177

    
178
//*********************** FACTORY ****************************************************/
179

    
180
	public static Sequence NewInstance(String consensusSequence){
181
		Sequence result = new Sequence();
182
		result.setSequenceString(consensusSequence);
183
		return result;
184
	}
185

    
186

    
187
	public static Sequence NewInstance(String consensusSequence, Integer length){
188
		Sequence result = NewInstance(consensusSequence);
189
		result.getConsensusSequence().setLength(length);
190
		return result;
191
	}
192

    
193
	public static Sequence NewInstance(DnaSample dnaSample, String consensusSequence, Integer length){
194
		Sequence result = NewInstance(consensusSequence);
195
		result.getConsensusSequence().setLength(length);
196
		dnaSample.addSequence(result);
197

    
198
		return result;
199
	}
200
//*********************** CONSTRUCTOR ****************************************************/
201

    
202
	protected Sequence() {}
203

    
204
//*********************** GETTER / SETTER ****************************************************/
205

    
206

    
207
	/**
208
	 * The {@link DnaSample dna sample} this sequencing belongs too.
209
	 */
210
	public DnaSample getDnaSample() {
211
		return dnaSample;
212
	}
213

    
214
	/**
215
	 * To be called only from {@link DnaSample#addSequence(Sequence)}
216
	 * @see #getDnaSample()
217
	 */
218
	//TODO implement full bidirectionality
219
	protected void setDnaSample(DnaSample dnaSample) {
220
		this.dnaSample = dnaSample;
221
		if (dnaSample != null && !dnaSample.getSequences().contains(this)){
222
			throw new RuntimeException("Don't use DNA setter");
223
		}
224
	}
225

    
226
	/**
227
	 * The resulting consensus sequence represened by this {@link Sequence sequence} .
228
	 * The consensus is usually computed from the {@link SingleRead single reads}.
229
	 * The result of which is stored in a file called {@link #getContigFile() contig file}
230
	 *
231
	 * #see {@link #getContigFile()}
232
	 * #see {@link #getSingleReads()}
233
	 */
234
	public SequenceString getConsensusSequence() {
235
		return consensusSequence;
236
	}
237

    
238

    
239
	/**
240
	 * @see #getConsensusSequence()
241
	 */
242
	public void setConsensusSequence(SequenceString sequenceString) {
243
		if (sequenceString == null){
244
			sequenceString = SequenceString.NewInstance();
245
		}
246
		this.consensusSequence = sequenceString;
247
	}
248

    
249
	/**
250
	 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
251
	 * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
252
	 * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
253
	 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
254
	 * whether the sequence is a barcoding sequence or not.
255
	 *
256
	 * @see #getBarcodeSequencePart()
257
	 * @see #getSequenceString()
258
	 * @returns the isBarcode flag value (tri-state)
259
	 *
260
	 */
261
	public Boolean getIsBarcode() {
262
		return isBarcode;
263
	}
264

    
265
	/**
266
	 * @see #getIsBarcode()
267
	 * @see #getBarcodeSequencePart()
268
	 */
269
	public void setIsBarcode(Boolean isBarcode) {
270
		this.isBarcode = isBarcode;
271
	}
272

    
273
	/**
274
	 * If the barcode sequence string does not include 100% of the (consensus) sequence
275
	 * the part used as barcode is provided here. However, the barcode part
276
	 * should be kept empty if consensus sequence string and barcode sequence string are equal.
277
	 *
278
	 * @see #getIsBarcode()
279
	 */
280
	public SequenceString getBarcodeSequencePart() {
281
		return barcodeSequencePart;
282
	}
283

    
284
	/**
285
	 * @see #getBarcodeSequencePart()
286
	 */
287
	public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
288
		if (barcodeSequencePart == null){
289
			barcodeSequencePart = SequenceString.NewInstance();
290
		}
291
		this.barcodeSequencePart = barcodeSequencePart;
292
	}
293

    
294
	/**
295
	 * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
296
	 * The marker should usually be similar to the one used in the according {@link Amplification
297
	 * amplification process}. However, it may slightly differ, or, if multiple amplifications where
298
	 * used to build this consensus sequence it may be the super set of the markers used in amplification.
299
	 *
300
	 * @return
301
	 */
302
	public DefinedTerm getDnaMarker(){
303
		return this.dnaMarker;
304
	}
305

    
306
	/**
307
	 * @see #getDnaMarker()
308
	 * @param marker
309
	 */
310
	public void setDnaMarker(DefinedTerm dnaMarker){
311
		this.dnaMarker = dnaMarker;
312
	}
313

    
314
	/**
315
	 * The accession number used in GenBank, EMBL and DDBJ.
316
	 * @return
317
	 */
318
	public String getGeneticAccessionNumber() {
319
		return geneticAccessionNumber;
320
	}
321

    
322
	/**
323
	 * Sets the genetic accession number.
324
	 * @see #getGeneticAccessionNumber()
325
	 */
326
	public void setGeneticAccessionNumber(String geneticAccessionNumber) {
327
		this.geneticAccessionNumber = geneticAccessionNumber;
328
	}
329

    
330

    
331
	/**
332
	 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
333
	 */
334
	public String getBoldProcessId() {
335
		return boldProcessId;
336
	}
337

    
338
	public void setBoldProcessId(String boldProcessId) {
339
		this.boldProcessId = boldProcessId;
340
	}
341

    
342
	/**
343
	 * Returns the name of the haplotype.
344
	 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
345
	 * A certain haplotype may be specific for an individual, a population or a species.
346
	 * @return
347
	 */
348
	public String getHaplotype() {
349
		return haplotype;
350
	}
351

    
352
	/**
353
	 * @see #getHaplotype()
354
	 */
355
	public void setHaplotype(String haplotype) {
356
		this.haplotype = haplotype;
357
	}
358

    
359
	/**
360
	 * The contigFile containing all data and data processing for this sequencing.
361
	 *
362
	 * @see #getConsensusSequence()
363
	 * @see #getSingleReads()
364
	 */
365
	public Media getContigFile() {
366
		return contigFile;
367
	}
368

    
369
	/**
370
	 * @see #getContigFile()
371
	 */
372
	public void setContigFile(Media contigFile) {
373
		this.contigFile = contigFile;
374
	}
375

    
376

    
377
	/**
378
	 * Citations are the set of references in which this sequence was published.
379
	 * Unlike taxonomic names the first publication of a sequence
380
	 * is not so important (maybe because it is required by publishers
381
	 * that they are all registered at Genbank) therefore we do not have something like an
382
	 * "original reference" attribute.<BR>
383
	 * Links to these references are to be stored within the reference itself.
384
	 * @return the set of references in which this sequence was published.
385
	 */
386
	public Set<Reference> getCitations() {
387
		return citations;
388
	}
389
	/**
390
	 * @see #getCitations()
391
	 */
392
	protected void setCitations(Set<Reference> citations) {
393
		this.citations = citations;
394
	}
395
	/**
396
	 * @see #getCitations()
397
	 */
398
	public void addCitation(Reference citation) {
399
		this.citations.add(citation);
400
	}
401
	/**
402
	 * @see #getCitations()
403
	 */
404
	public void removeCitation(Reference citation) {
405
		this.citations.remove(citation);
406
	}
407

    
408
	/**
409
	 * The {@link SingleRead single reads} that were used to build this consensus sequence.
410
	 *
411
	 * @see #getConsensusSequence()
412
	 * @see #getContigFile()
413
	 */
414
	public Set<SingleReadAlignment> getSingleReadAlignments() {
415
		return singleReadAlignments;
416
	}
417
	/**
418
	 * @see #getSingleReads()
419
	 */
420
	public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
421
		this.singleReadAlignments.add(singleReadAlignment);
422
		if (! this.equals(singleReadAlignment.getConsensusSequence())){
423
			singleReadAlignment.setConsensusAlignment(this);
424
		};
425
	}
426
	/**
427
	 * @see #getSingleReads()
428
	 */
429
	public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
430
		this.singleReadAlignments.remove(singleReadAlignment);
431
		if (this.equals(singleReadAlignment.getConsensusSequence())){
432
			singleReadAlignment.setConsensusAlignment(null);
433
			singleReadAlignment.setSingleRead(null);
434
		}
435
	}
436
//	/**
437
//	 * @see #getSingleReads()
438
//	 */
439
//	//TODO private as long it is unclear how bidirectionality is handled
440
//	@SuppressWarnings("unused")
441
//	private void setSingleReadAlignments(Set<SingleReadAlignment> singleReadAlignments) {
442
//		this.singleReadAlignments = singleReadAlignments;
443
//	}
444

    
445
// *********************** CONVENIENCE ***********************************/
446

    
447
	/**
448
	 * Convenience method to add a single read to a consensus sequence
449
	 * by creating a {@link SingleReadAlignment}.
450
	 * @param singleRead the {@link SingleRead} to add
451
	 * @return the created SingleReadAlignment
452
	 */
453
	public SingleReadAlignment addSingleRead(SingleRead singleRead) {
454
		SingleReadAlignment alignment = SingleReadAlignment.NewInstance(this, singleRead);
455
		return alignment;
456
	}
457

    
458
	public void removeSingleRead(SingleRead singleRead) {
459
		Set<SingleReadAlignment> toRemove = new HashSet<SingleReadAlignment>();
460
		for (SingleReadAlignment align : this.singleReadAlignments){
461
			if (align.getSingleRead() != null && align.getSingleRead().equals(singleRead)){
462
				toRemove.add(align);
463
			}
464
		}
465
		for (SingleReadAlignment align : toRemove){
466
			removeSingleReadAlignment(align);
467
		}
468
		return;
469
	}
470

    
471
	/**
472
	 * Convenience method that returns all single reads this consensus sequence
473
	 * is based on via {@link SingleReadAlignment}s.
474
	 * @return set of related single reads
475
	 */
476
	@XmlTransient
477
	@Transient
478
	public Set<SingleRead> getSingleReads(){
479
		Set<SingleRead> singleReads = new HashSet<SingleRead>();
480
		for (SingleReadAlignment align : this.singleReadAlignments){
481
			if (align.getSingleRead() != null){  // == null should not happen
482
				singleReads.add(align.getSingleRead());
483
			}
484
		}
485
		return singleReads;
486
	}
487

    
488

    
489
	//*************************** Transient GETTER /SETTER *****************************/
490

    
491
	/**
492
	 * Delegate method to get the text representation of the consensus sequence
493
	 * @see #setSequenceString(String)
494
	 */
495
	@Transient
496
	public String getSequenceString() {
497
		return consensusSequence.getString();
498
	}
499

    
500
	/**
501
	 * Delegate method to set the text representation of the {@link #getConsensusSequence()
502
	 * consensus sequence}.
503
	 */
504
	@Transient
505
	public void setSequenceString(String sequence) {
506
		consensusSequence.setString(sequence);
507
	}
508

    
509
	/**
510
	 * Convenience method which computes the set of all related pherograms
511
	 * @return the set of pherograms.
512
	 */
513
	@Transient
514
	public Set<Media> getPherograms(){
515
		Set<Media> result = new HashSet<Media>();
516
		for (SingleReadAlignment singleReadAlign : singleReadAlignments){
517
			if (singleReadAlign.getSingleRead() != null &&  singleReadAlign.getSingleRead().getPherogram() != null){
518
				result.add(singleReadAlign.getSingleRead().getPherogram());
519
			}
520
		}
521
		return result;
522
	}
523

    
524

    
525
	//***** Registrations ************/
526
	/**
527
	 * Returns the computed genBank uri.
528
	 * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber}
529
	 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
530
	 */
531
	@Transient
532
	public URI getGenBankUri() throws URISyntaxException {
533
		return createExternalUri(GENBANK_BASE_URI, geneticAccessionNumber);
534
	}
535

    
536
	/**
537
	 * Returns the computed EMBL uri.
538
	 * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber}
539
	 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
540
	 */
541
	@Transient
542
	public URI getEmblUri() throws URISyntaxException {
543
		return createExternalUri(EMBL_BASE_URI, geneticAccessionNumber);
544
	}
545

    
546
	/**
547
	 * Returns the computed DDBJ uri.
548
	 * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber}
549
	 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
550
	 */
551
	@Transient
552
	public URI getDdbjUri() throws URISyntaxException {
553
		return createExternalUri(DDBJ_BASE_URI, geneticAccessionNumber);
554
	}
555

    
556
	/**
557
	 * Returns the URI for the BOLD entry.
558
	 * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId}
559
	 * @throws URISyntaxException when URI could not be created with {@link #boldProcessId}
560
	 * @see #getBoldProcessId()
561
	 */
562
	@Transient
563
	public URI getBoldUri() throws URISyntaxException {
564
		return createExternalUri(BOLD_BASE_URI, boldProcessId);
565
	}
566

    
567
	private URI createExternalUri(String baseUri, String id) throws URISyntaxException{
568
		if (CdmUtils.isNotBlank(id)){
569
			return new URI(String.format(baseUri, id.trim()));
570
		}else{
571
			return null;
572
		}
573
	}
574

    
575

    
576

    
577

    
578
	//*********************** CLONE ********************************************************/
579
	/**
580
	 * Clones <i>this</i> sequence. This is a shortcut that enables to create
581
	 * a new instance that differs only slightly from <i>this</i> sequencing by
582
	 * modifying only some of the attributes.<BR><BR>
583
	 *
584
	 *
585
	 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
586
	 * @see java.lang.Object#clone()
587
	 */
588
	@Override
589
	public Object clone()  {
590
		try{
591
		Sequence result = (Sequence)super.clone();
592

    
593
		//sequences
594
		result.consensusSequence = (SequenceString)this.consensusSequence.clone();
595
		result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
596

    
597

    
598
		//single sequences
599
		result.singleReadAlignments = new HashSet<SingleReadAlignment>();
600
		for (SingleReadAlignment singleReadAlign: this.singleReadAlignments){
601
			SingleReadAlignment newAlignment = (SingleReadAlignment)singleReadAlign.clone();
602
			result.singleReadAlignments.add(newAlignment);
603
		}
604

    
605
		//citations  //TODO do we really want to copy these ??
606
		result.citations = new HashSet<Reference>();
607
		for (Reference ref: this.citations){
608
			result.citations.add(ref);
609
		}
610

    
611

    
612

    
613
		return result;
614
		}catch (CloneNotSupportedException e) {
615
			logger.warn("Object does not implement cloneable");
616
			e.printStackTrace();
617
			return null;
618
		}
619
	}
620

    
621

    
622
}
(8-8/14)