Project

General

Profile

Download (18.9 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.model.molecular;
10

    
11

    
12
import java.net.URI;
13
import java.net.URISyntaxException;
14
import java.util.HashSet;
15
import java.util.Set;
16

    
17
import javax.persistence.Column;
18
import javax.persistence.Entity;
19
import javax.persistence.FetchType;
20
import javax.persistence.ManyToMany;
21
import javax.persistence.ManyToOne;
22
import javax.persistence.OneToMany;
23
import javax.persistence.Transient;
24
import javax.xml.bind.annotation.XmlAccessType;
25
import javax.xml.bind.annotation.XmlAccessorType;
26
import javax.xml.bind.annotation.XmlAttribute;
27
import javax.xml.bind.annotation.XmlElement;
28
import javax.xml.bind.annotation.XmlElementWrapper;
29
import javax.xml.bind.annotation.XmlIDREF;
30
import javax.xml.bind.annotation.XmlRootElement;
31
import javax.xml.bind.annotation.XmlSchemaType;
32
import javax.xml.bind.annotation.XmlTransient;
33
import javax.xml.bind.annotation.XmlType;
34

    
35
import org.apache.log4j.Logger;
36
import org.hibernate.annotations.Cascade;
37
import org.hibernate.annotations.CascadeType;
38
import org.hibernate.envers.Audited;
39
import org.hibernate.search.annotations.IndexedEmbedded;
40
import org.springframework.beans.factory.annotation.Configurable;
41

    
42
import eu.etaxonomy.cdm.common.CdmUtils;
43
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
44
import eu.etaxonomy.cdm.model.common.DefinedTerm;
45
import eu.etaxonomy.cdm.model.common.TermType;
46
import eu.etaxonomy.cdm.model.media.Media;
47
import eu.etaxonomy.cdm.model.reference.Reference;
48

    
49
/**
50
 * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
51
 * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
52
 * while
53
 *
54
 * <BR>This class holds information about both the combining process of
55
 * {@link SingleRead single sequences} to one consensus sequence
56
 * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
57
 * as well as sequence related information.
58
 * The later includes the {@link #getConsensusSequence() sequence string} itself,
59
 * important genetic information about the DNA that has been sequenced
60
 * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
61
 * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
62
 * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
63
 * {@link #getBarcodeSequencePart() barcode sequence}, ...).
64
 *
65
 * @author m.doering
66
 * @since 08-Nov-2007 13:06:51
67
 * @author a.mueller
68
 * @updated 11-Jul-2013
69
 */
70
@XmlAccessorType(XmlAccessType.FIELD)
71
@XmlType(name = "Sequence", propOrder = {
72
    "dnaSample",
73
	"consensusSequence",
74
	"isBarcode",
75
    "barcodeSequencePart",
76
    "dnaMarker",
77
    "geneticAccessionNumber",
78
    "boldProcessId",
79
    "haplotype",
80
    "contigFile",
81
    "singleReadAlignments",
82
    "citations"
83
})
84
@XmlRootElement(name = "Sequencing")
85
@Entity
86
@Audited
87
@Configurable
88
//@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
89
public class Sequence extends AnnotatableEntity implements Cloneable{
90
	private static final long serialVersionUID = 8298983152731241775L;
91
	private static final Logger logger = Logger.getLogger(Sequence.class);
92

    
93
	//TODO move to cdmlib-ext?
94
	private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
95
	private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
96
	private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
97
	private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
98

    
99
    @XmlElement( name = "DnaSample")
100
    @XmlIDREF
101
    @XmlSchemaType(name = "IDREF")
102
    @ManyToOne(fetch = FetchType.LAZY)
103
    @IndexedEmbedded
104
    private DnaSample dnaSample;
105

    
106

    
107
	/** @see #getContigFile() */
108
	@XmlElement(name = "ContigFile")
109
    @XmlIDREF
110
    @XmlSchemaType(name = "IDREF")
111
    @ManyToOne(fetch = FetchType.LAZY)
112
	@Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
113
	private Media contigFile;
114

    
115
	/** @see #getConsensusSequence() */
116
	@XmlElement(name = "ConsensusSequence")
117
    private SequenceString consensusSequence = SequenceString.NewInstance();
118

    
119
	@XmlAttribute(name = "isBarcode")
120
	private Boolean isBarcode = null;
121

    
122
	/** @see #getBarcodeSequence()*/
123
	@XmlElement(name = "BarcodeSequencePart")
124
    private SequenceString barcodeSequencePart = SequenceString.NewInstance();
125

    
126
	/** @see #getGeneticAccessionNumber()*/
127
	@XmlElement(name = "GeneticAccessionNumber")
128
    @Column(length=20)
129
	private String geneticAccessionNumber;
130

    
131
	/** @see #getBoldProcessId() */
132
	@XmlElement(name = "BoldProcessId")
133
    @Column(length=20)
134
	private String boldProcessId;
135

    
136
    @XmlElementWrapper(name = "SingleReadAlignments")
137
    @XmlElement(name = "SingleReadAlignment")
138
    @XmlIDREF
139
    @XmlSchemaType(name = "IDREF")
140
    @OneToMany(mappedBy="consensusAlignment", fetch = FetchType.LAZY, orphanRemoval=true)
141
    @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
142
	private Set<SingleReadAlignment> singleReadAlignments = new HashSet<SingleReadAlignment>();
143

    
144
	/** @see #getDnaMarker() */
145
	@XmlElement(name = "DnaMarker")
146
    @XmlIDREF
147
    @XmlSchemaType(name = "IDREF")
148
    @ManyToOne(fetch = FetchType.LAZY)
149
	//no cascade as it is a defined term
150
	private DefinedTerm dnaMarker;
151

    
152

    
153
	/** @see #getHaplotype() */
154
	@XmlElement(name = "Haplotype")
155
    @Column(length=100)
156
	private String haplotype;
157

    
158
	/** @see #getCitations() */
159
	@XmlElementWrapper(name = "Citations")
160
    @XmlElement(name = "Citation")
161
    @XmlIDREF
162
    @XmlSchemaType(name = "IDREF")
163
    @ManyToMany(fetch = FetchType.LAZY)
164
    @Cascade({CascadeType.SAVE_UPDATE,CascadeType.MERGE})
165
	private Set<Reference> citations = new HashSet<>();
166

    
167
//	//should be calculated in case sequence is set
168
//	@XmlElement (name = "DateSequenced", type= String.class)
169
//	@XmlJavaTypeAdapter(DateTimeAdapter.class)
170
//	@Type(type="dateTimeUserType")
171
//	@Basic(fetch = FetchType.LAZY)
172
//	private DateTime dateSequenced;
173

    
174

    
175
//*********************** FACTORY ****************************************************/
176

    
177
	public static Sequence NewInstance(String consensusSequence){
178
		Sequence result = new Sequence();
179
		result.setSequenceString(consensusSequence);
180
		return result;
181
	}
182

    
183

    
184
	public static Sequence NewInstance(String consensusSequence, Integer length){
185
		Sequence result = NewInstance(consensusSequence);
186
		result.getConsensusSequence().setLength(length);
187
		return result;
188
	}
189

    
190
	public static Sequence NewInstance(DnaSample dnaSample, String consensusSequence, Integer length){
191
		Sequence result = NewInstance(consensusSequence);
192
		result.getConsensusSequence().setLength(length);
193
		dnaSample.addSequence(result);
194

    
195
		return result;
196
	}
197
//*********************** CONSTRUCTOR ****************************************************/
198

    
199
	protected Sequence() {}
200

    
201
//*********************** GETTER / SETTER ****************************************************/
202

    
203

    
204
	/**
205
	 * The {@link DnaSample dna sample} this sequencing belongs too.
206
	 */
207
	public DnaSample getDnaSample() {
208
		return dnaSample;
209
	}
210

    
211
	/**
212
	 * To be called only from {@link DnaSample#addSequence(Sequence)}
213
	 * @see #getDnaSample()
214
	 */
215
	//TODO implement full bidirectionality
216
	protected void setDnaSample(DnaSample dnaSample) {
217
		this.dnaSample = dnaSample;
218
		if (dnaSample != null && !dnaSample.getSequences().contains(this)){
219
			throw new RuntimeException("Don't use DNA setter");
220
		}
221
	}
222

    
223
	/**
224
	 * The resulting consensus sequence represened by this {@link Sequence sequence} .
225
	 * The consensus is usually computed from the {@link SingleRead single reads}.
226
	 * The result of which is stored in a file called {@link #getContigFile() contig file}
227
	 *
228
	 * #see {@link #getContigFile()}
229
	 * #see {@link #getSingleReads()}
230
	 */
231
	public SequenceString getConsensusSequence() {
232
		return consensusSequence;
233
	}
234

    
235

    
236
	/**
237
	 * @see #getConsensusSequence()
238
	 */
239
	public void setConsensusSequence(SequenceString sequenceString) {
240
		if (sequenceString == null){
241
			sequenceString = SequenceString.NewInstance();
242
		}
243
		this.consensusSequence = sequenceString;
244
	}
245

    
246
	/**
247
	 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
248
	 * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
249
	 * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
250
	 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
251
	 * whether the sequence is a barcoding sequence or not.
252
	 *
253
	 * @see #getBarcodeSequencePart()
254
	 * @see #getSequenceString()
255
	 * @returns the isBarcode flag value (tri-state)
256
	 *
257
	 */
258
	public Boolean getIsBarcode() {
259
		return isBarcode;
260
	}
261

    
262
	/**
263
	 * @see #getIsBarcode()
264
	 * @see #getBarcodeSequencePart()
265
	 */
266
	public void setIsBarcode(Boolean isBarcode) {
267
		this.isBarcode = isBarcode;
268
	}
269

    
270
	/**
271
	 * If the barcode sequence string does not include 100% of the (consensus) sequence
272
	 * the part used as barcode is provided here. However, the barcode part
273
	 * should be kept empty if consensus sequence string and barcode sequence string are equal.
274
	 *
275
	 * @see #getIsBarcode()
276
	 */
277
	public SequenceString getBarcodeSequencePart() {
278
		return barcodeSequencePart;
279
	}
280

    
281
	/**
282
	 * @see #getBarcodeSequencePart()
283
	 */
284
	public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
285
		if (barcodeSequencePart == null){
286
			barcodeSequencePart = SequenceString.NewInstance();
287
		}
288
		this.barcodeSequencePart = barcodeSequencePart;
289
	}
290

    
291
	/**
292
	 * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
293
	 * The marker should usually be similar to the one used in the according {@link Amplification
294
	 * amplification process}. However, it may slightly differ, or, if multiple amplifications where
295
	 * used to build this consensus sequence it may be the super set of the markers used in amplification.
296
	 *
297
	 * @return
298
	 */
299
	public DefinedTerm getDnaMarker(){
300
		return this.dnaMarker;
301
	}
302

    
303
	/**
304
	 * @see #getDnaMarker()
305
	 * @param marker
306
	 */
307
	public void setDnaMarker(DefinedTerm dnaMarker){
308
		this.dnaMarker = dnaMarker;
309
	}
310

    
311
	/**
312
	 * The accession number used in GenBank, EMBL and DDBJ.
313
	 * @return
314
	 */
315
	public String getGeneticAccessionNumber() {
316
		return geneticAccessionNumber;
317
	}
318

    
319
	/**
320
	 * Sets the genetic accession number.
321
	 * @see #getGeneticAccessionNumber()
322
	 */
323
	public void setGeneticAccessionNumber(String geneticAccessionNumber) {
324
		this.geneticAccessionNumber = geneticAccessionNumber;
325
	}
326

    
327

    
328
	/**
329
	 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
330
	 */
331
	public String getBoldProcessId() {
332
		return boldProcessId;
333
	}
334

    
335
	public void setBoldProcessId(String boldProcessId) {
336
		this.boldProcessId = boldProcessId;
337
	}
338

    
339
	/**
340
	 * Returns the name of the haplotype.
341
	 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
342
	 * A certain haplotype may be specific for an individual, a population or a species.
343
	 * @return
344
	 */
345
	public String getHaplotype() {
346
		return haplotype;
347
	}
348

    
349
	/**
350
	 * @see #getHaplotype()
351
	 */
352
	public void setHaplotype(String haplotype) {
353
		this.haplotype = haplotype;
354
	}
355

    
356
	/**
357
	 * The contigFile containing all data and data processing for this sequencing.
358
	 *
359
	 * @see #getConsensusSequence()
360
	 * @see #getSingleReads()
361
	 */
362
	public Media getContigFile() {
363
		return contigFile;
364
	}
365

    
366
	/**
367
	 * @see #getContigFile()
368
	 */
369
	public void setContigFile(Media contigFile) {
370
		this.contigFile = contigFile;
371
	}
372

    
373

    
374
	/**
375
	 * Citations are the set of references in which this sequence was published.
376
	 * Unlike taxonomic names the first publication of a sequence
377
	 * is not so important (maybe because it is required by publishers
378
	 * that they are all registered at Genbank) therefore we do not have something like an
379
	 * "original reference" attribute.<BR>
380
	 * Links to these references are to be stored within the reference itself.
381
	 * @return the set of references in which this sequence was published.
382
	 */
383
	public Set<Reference> getCitations() {
384
		return citations;
385
	}
386
	/**
387
	 * @see #getCitations()
388
	 */
389
	protected void setCitations(Set<Reference> citations) {
390
		this.citations = citations;
391
	}
392
	/**
393
	 * @see #getCitations()
394
	 */
395
	public void addCitation(Reference citation) {
396
		this.citations.add(citation);
397
	}
398
	/**
399
	 * @see #getCitations()
400
	 */
401
	public void removeCitation(Reference citation) {
402
		this.citations.remove(citation);
403
	}
404

    
405
	/**
406
	 * The {@link SingleRead single reads} that were used to build this consensus sequence.
407
	 *
408
	 * @see #getConsensusSequence()
409
	 * @see #getContigFile()
410
	 */
411
	public Set<SingleReadAlignment> getSingleReadAlignments() {
412
		return singleReadAlignments;
413
	}
414
	/**
415
	 * @see #getSingleReads()
416
	 */
417
	public void addSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
418
		this.singleReadAlignments.add(singleReadAlignment);
419
		if (! this.equals(singleReadAlignment.getConsensusSequence())){
420
			singleReadAlignment.setConsensusAlignment(this);
421
		};
422
	}
423
	/**
424
	 * @see #getSingleReads()
425
	 */
426
	public void removeSingleReadAlignment(SingleReadAlignment singleReadAlignment) {
427
		this.singleReadAlignments.remove(singleReadAlignment);
428
		if (this.equals(singleReadAlignment.getConsensusSequence())){
429
			singleReadAlignment.setConsensusAlignment(null);
430
			singleReadAlignment.setSingleRead(null);
431
		}
432
	}
433
//	/**
434
//	 * @see #getSingleReads()
435
//	 */
436
//	//TODO private as long it is unclear how bidirectionality is handled
437
//	@SuppressWarnings("unused")
438
//	private void setSingleReadAlignments(Set<SingleReadAlignment> singleReadAlignments) {
439
//		this.singleReadAlignments = singleReadAlignments;
440
//	}
441

    
442
// *********************** CONVENIENCE ***********************************/
443

    
444
	/**
445
	 * Convenience method to add a single read to a consensus sequence
446
	 * by creating a {@link SingleReadAlignment}.
447
	 * @param singleRead the {@link SingleRead} to add
448
	 * @return the created SingleReadAlignment
449
	 */
450
	public SingleReadAlignment addSingleRead(SingleRead singleRead) {
451
		SingleReadAlignment alignment = SingleReadAlignment.NewInstance(this, singleRead);
452
		return alignment;
453
	}
454

    
455
	public void removeSingleRead(SingleRead singleRead) {
456
		Set<SingleReadAlignment> toRemove = new HashSet<SingleReadAlignment>();
457
		for (SingleReadAlignment align : this.singleReadAlignments){
458
			if (align.getSingleRead() != null && align.getSingleRead().equals(singleRead)){
459
				toRemove.add(align);
460
			}
461
		}
462
		for (SingleReadAlignment align : toRemove){
463
			removeSingleReadAlignment(align);
464
		}
465
		return;
466
	}
467

    
468
	/**
469
	 * Convenience method that returns all single reads this consensus sequence
470
	 * is based on via {@link SingleReadAlignment}s.
471
	 * @return set of related single reads
472
	 */
473
	@XmlTransient
474
	@Transient
475
	public Set<SingleRead> getSingleReads(){
476
		Set<SingleRead> singleReads = new HashSet<SingleRead>();
477
		for (SingleReadAlignment align : this.singleReadAlignments){
478
			if (align.getSingleRead() != null){  // == null should not happen
479
				singleReads.add(align.getSingleRead());
480
			}
481
		}
482
		return singleReads;
483
	}
484

    
485

    
486
	//*************************** Transient GETTER /SETTER *****************************/
487

    
488
	/**
489
	 * Delegate method to get the text representation of the consensus sequence
490
	 * @see #setSequenceString(String)
491
	 */
492
	@Transient
493
	public String getSequenceString() {
494
		return consensusSequence.getString();
495
	}
496

    
497
	/**
498
	 * Delegate method to set the text representation of the {@link #getConsensusSequence()
499
	 * consensus sequence}.
500
	 */
501
	@Transient
502
	public void setSequenceString(String sequence) {
503
		consensusSequence.setString(sequence);
504
	}
505

    
506
	/**
507
	 * Convenience method which computes the set of all related pherograms
508
	 * @return the set of pherograms.
509
	 */
510
	@Transient
511
	public Set<Media> getPherograms(){
512
		Set<Media> result = new HashSet<Media>();
513
		for (SingleReadAlignment singleReadAlign : singleReadAlignments){
514
			if (singleReadAlign.getSingleRead() != null &&  singleReadAlign.getSingleRead().getPherogram() != null){
515
				result.add(singleReadAlign.getSingleRead().getPherogram());
516
			}
517
		}
518
		return result;
519
	}
520

    
521

    
522
	//***** Registrations ************/
523
	/**
524
	 * Returns the computed genBank uri.
525
	 * @return the uri composed of {@link #GENBANK_BASE_URI} and {@link #geneticAccessionNumber}
526
	 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
527
	 */
528
	@Transient
529
	public URI getGenBankUri() throws URISyntaxException {
530
		return createExternalUri(GENBANK_BASE_URI, geneticAccessionNumber);
531
	}
532

    
533
	/**
534
	 * Returns the computed EMBL uri.
535
	 * @return the uri composed of {@link #EMBL_BASE_URI} and {@link #geneticAccessionNumber}
536
	 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
537
	 */
538
	@Transient
539
	public URI getEmblUri() throws URISyntaxException {
540
		return createExternalUri(EMBL_BASE_URI, geneticAccessionNumber);
541
	}
542

    
543
	/**
544
	 * Returns the computed DDBJ uri.
545
	 * @return the uri composed of {@link #DDBJ_BASE_URI} and {@link #geneticAccessionNumber}
546
	 * @throws URISyntaxException when URI could not be created with {@link #geneticAccessionNumber}
547
	 */
548
	@Transient
549
	public URI getDdbjUri() throws URISyntaxException {
550
		return createExternalUri(DDBJ_BASE_URI, geneticAccessionNumber);
551
	}
552

    
553
	/**
554
	 * Returns the URI for the BOLD entry.
555
	 * @return the uri composed of {@link #BOLD_BASE_URI} and {@link #boldProcessId}
556
	 * @throws URISyntaxException when URI could not be created with {@link #boldProcessId}
557
	 * @see #getBoldProcessId()
558
	 */
559
	@Transient
560
	public URI getBoldUri() throws URISyntaxException {
561
		return createExternalUri(BOLD_BASE_URI, boldProcessId);
562
	}
563

    
564
	private URI createExternalUri(String baseUri, String id) throws URISyntaxException{
565
		if (CdmUtils.isNotBlank(id)){
566
			return new URI(String.format(baseUri, id.trim()));
567
		}else{
568
			return null;
569
		}
570
	}
571

    
572

    
573

    
574

    
575
	//*********************** CLONE ********************************************************/
576
	/**
577
	 * Clones <i>this</i> sequence. This is a shortcut that enables to create
578
	 * a new instance that differs only slightly from <i>this</i> sequencing by
579
	 * modifying only some of the attributes.<BR><BR>
580
	 *
581
	 *
582
	 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
583
	 * @see java.lang.Object#clone()
584
	 */
585
	@Override
586
	public Object clone()  {
587
		try{
588
		Sequence result = (Sequence)super.clone();
589

    
590
		//sequences
591
		result.consensusSequence = (SequenceString)this.consensusSequence.clone();
592
		result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
593

    
594

    
595
		//single sequences
596
		result.singleReadAlignments = new HashSet<SingleReadAlignment>();
597
		for (SingleReadAlignment singleReadAlign: this.singleReadAlignments){
598
			SingleReadAlignment newAlignment = (SingleReadAlignment)singleReadAlign.clone();
599
			result.singleReadAlignments.add(newAlignment);
600
		}
601

    
602
		//citations  //TODO do we really want to copy these ??
603
		result.citations = new HashSet<Reference>();
604
		for (Reference ref: this.citations){
605
			result.citations.add(ref);
606
		}
607

    
608

    
609

    
610
		return result;
611
		}catch (CloneNotSupportedException e) {
612
			logger.warn("Object does not implement cloneable");
613
			e.printStackTrace();
614
			return null;
615
		}
616
	}
617

    
618

    
619
}
(8-8/14)