add "part" to sequence setter
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / model / molecular / Sequence.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.model.molecular;
10
11
12 import java.net.URI;
13 import java.util.HashSet;
14 import java.util.Set;
15
16 import javax.persistence.Entity;
17 import javax.persistence.FetchType;
18 import javax.persistence.ManyToMany;
19 import javax.persistence.ManyToOne;
20 import javax.persistence.Transient;
21 import javax.validation.constraints.Size;
22 import javax.xml.bind.annotation.XmlAccessType;
23 import javax.xml.bind.annotation.XmlAccessorType;
24 import javax.xml.bind.annotation.XmlAttribute;
25 import javax.xml.bind.annotation.XmlElement;
26 import javax.xml.bind.annotation.XmlElementWrapper;
27 import javax.xml.bind.annotation.XmlIDREF;
28 import javax.xml.bind.annotation.XmlRootElement;
29 import javax.xml.bind.annotation.XmlSchemaType;
30 import javax.xml.bind.annotation.XmlType;
31
32 import org.apache.log4j.Logger;
33 import org.codehaus.plexus.util.StringUtils;
34 import org.hibernate.annotations.Cascade;
35 import org.hibernate.annotations.CascadeType;
36 import org.hibernate.envers.Audited;
37 import org.hibernate.search.annotations.IndexedEmbedded;
38 import org.springframework.beans.factory.annotation.Configurable;
39
40 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
41 import eu.etaxonomy.cdm.model.common.DefinedTerm;
42 import eu.etaxonomy.cdm.model.common.TermType;
43 import eu.etaxonomy.cdm.model.media.Media;
44 import eu.etaxonomy.cdm.model.reference.Reference;
45
46 /**
47 * Alignment of multiple {@link SingleRead single sequences} to a consensus sequence.
48 * This sequence is a part of (or the complete) DNA sequences of the related {@link DnaSample DNA Sample},
49 * while
50 *
51 * <BR>This class holds information about both the combining process of
52 * {@link SingleRead single sequences} to one consensus sequence
53 * ({@link #getSingleReads() singleReads} , {@link #getContigFile() contigFile} )
54 * as well as sequence related information.
55 * The later includes the {@link #getConsensusSequence() sequence string} itself,
56 * important genetic information about the DNA that has been sequenced
57 * ({@link #getDnaMarker() marker} , {@link #getHaplotype()} haplotype) as well as
58 * registration information ({@link #getGeneticAccessionNumber() genetic accession number} ),
59 * citations, and barcoding information ({@link #getBoldProcessId() BOLD-id},
60 * {@link #getBarcodeSequencePart() barcode sequence}, ...).
61 *
62 * @author m.doering
63 * @created 08-Nov-2007 13:06:51
64 * @author a.mueller
65 * @updated 11-Jul-2013
66 */
67 @XmlAccessorType(XmlAccessType.FIELD)
68 @XmlType(name = "Sequence", propOrder = {
69 "dnaSample",
70 "consensusSequence",
71 "isBarcode",
72 "barcodeSequencePart",
73 "dnaMarker",
74 "geneticAccessionNumber",
75 "boldProcessId",
76 "haplotype",
77 "contigFile",
78 "singleReads",
79 "citations"
80 })
81 @XmlRootElement(name = "Sequencing")
82 @Entity
83 @Audited
84 @Configurable
85 //@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
86 public class Sequence extends AnnotatableEntity implements Cloneable{
87 private static final long serialVersionUID = 8298983152731241775L;
88 private static final Logger logger = Logger.getLogger(Sequence.class);
89
90 //TODO move to cdmlib-ext?
91 private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
92 private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
93 private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
94 private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
95
96 @XmlElement( name = "DnaSample")
97 @XmlIDREF
98 @XmlSchemaType(name = "IDREF")
99 @ManyToOne(fetch = FetchType.LAZY)
100 @IndexedEmbedded
101 private DnaSample dnaSample;
102
103
104 /** @see #getContigFile() */
105 @XmlElement(name = "ContigFile")
106 @XmlIDREF
107 @XmlSchemaType(name = "IDREF")
108 @ManyToOne(fetch = FetchType.LAZY)
109 private Media contigFile;
110
111 /** @see #getConsensusSequence() */
112 @XmlElement(name = "ConsensusSequence")
113 private SequenceString consensusSequence = SequenceString.NewInstance();
114
115 @XmlAttribute(name = "isBarcode")
116 private Boolean isBarcode = null;
117
118 /** @see #getBarcodeSequence()*/
119 @XmlElement(name = "BarcodeSequencePart")
120 private SequenceString barcodeSequencePart = SequenceString.NewInstance();
121
122 /** @see #getGeneticAccessionNumber()*/
123 @XmlElement(name = "GeneticAccessionNumber")
124 @Size(max=20)
125 private String geneticAccessionNumber;
126
127 /** @see #getBoldProcessId() */
128 @XmlElement(name = "BoldProcessId")
129 @Size(max=20)
130 private String boldProcessId;
131
132 @XmlElementWrapper(name = "SingleReads")
133 @XmlElement(name = "SingleRead")
134 @XmlIDREF
135 @XmlSchemaType(name = "IDREF")
136 @ManyToMany(fetch = FetchType.LAZY)
137 @Cascade({CascadeType.SAVE_UPDATE})
138 private Set<SingleRead> singleReads = new HashSet<SingleRead>();
139
140 /** @see #getDnaMarker() */
141 @XmlElement(name = "DnaMarker")
142 @XmlIDREF
143 @XmlSchemaType(name = "IDREF")
144 @ManyToOne(fetch = FetchType.LAZY)
145 //no cascade as it is a defined term
146 private DefinedTerm dnaMarker;
147
148
149 /** @see #getHaplotype() */
150 @XmlElement(name = "Haplotype")
151 @Size(max=100)
152 private String haplotype;
153
154 /** @see #getCitations() */
155 @XmlElementWrapper(name = "Citations")
156 @XmlElement(name = "Citation")
157 @XmlIDREF
158 @XmlSchemaType(name = "IDREF")
159 @ManyToMany(fetch = FetchType.LAZY)
160 @Cascade({CascadeType.SAVE_UPDATE})
161 private Set<Reference> citations = new HashSet<Reference>();
162
163 // //should be calculated in case sequence is set
164 // @XmlElement (name = "DateSequenced", type= String.class)
165 // @XmlJavaTypeAdapter(DateTimeAdapter.class)
166 // @Type(type="dateTimeUserType")
167 // @Basic(fetch = FetchType.LAZY)
168 // private DateTime dateSequenced;
169
170
171 //*********************** FACTORY ****************************************************/
172
173 public static Sequence NewInstance(String consensusSequence){
174 Sequence result = new Sequence();
175 result.setSequenceString(consensusSequence);
176 return result;
177 }
178
179
180 public static Sequence NewInstance(String consensusSequence, Integer length){
181 Sequence result = NewInstance(consensusSequence);
182 result.getConsensusSequence().setLength(length);
183 return result;
184 }
185 //*********************** CONSTRUCTOR ****************************************************/
186
187 protected Sequence() {}
188
189 //*********************** GETTER / SETTER ****************************************************/
190
191
192 /**
193 * The {@link DnaSample dna sample} this sequencing belongs too.
194 */
195 public DnaSample getDnaSample() {
196 return dnaSample;
197 }
198
199 //TODO bidirectionality??
200 /**
201 * @see #getDnaSample()
202 */
203 private void setDnaSample(DnaSample dnaSample) {
204 this.dnaSample = dnaSample;
205 }
206
207 /**
208 * The resulting consensus sequence represened by this {@link Sequence sequence} .
209 * The consensus is usually computed from the {@link SingleRead single reads}.
210 * The result of which is stored in a file called {@link #getContigFile() contig file}
211 *
212 * #see {@link #getContigFile()}
213 * #see {@link #getSingleReads()}
214 */
215 public SequenceString getConsensusSequence() {
216 return consensusSequence;
217 }
218
219
220 /**
221 * @see #getConsensusSequence()
222 */
223 public void setConsensusSequence(SequenceString sequenceString) {
224 if (sequenceString == null){
225 sequenceString = SequenceString.NewInstance();
226 }
227 this.consensusSequence = sequenceString;
228 }
229
230 /**
231 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
232 * a barcoding sequence. If the barcoding sequence is only a part of the consensus sequence
233 * this part shall be stored as {@link #getBarcodeSequencePart() barcoding sequence part}.
234 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
235 * whether the sequence is a barcoding sequence or not.
236 *
237 * @see #getBarcodeSequencePart()
238 * @see #getSequenceString()
239 * @returns the isBarcode flag value (tri-state)
240 *
241 */
242 public Boolean getIsBarcode() {
243 return isBarcode;
244 }
245
246 /**
247 * @see #getIsBarcode()
248 * @see #getBarcodeSequencePart()
249 */
250 public void setIsBarcode(Boolean isBarcode) {
251 this.isBarcode = isBarcode;
252 }
253
254 /**
255 * If the barcode sequence string does not include 100% of the (consensus) sequence
256 * the part used as barcode is provided here. However, the barcode part
257 * should be kept empty if consensus sequence string and barcode sequence string are equal.
258 *
259 * @see #getIsBarcode()
260 */
261 public SequenceString getBarcodeSequencePart() {
262 return barcodeSequencePart;
263 }
264
265 /**
266 * @see #getBarcodeSequencePart()
267 */
268 public void setBarcodeSequencePart(SequenceString barcodeSequencePart) {
269 if (barcodeSequencePart == null){
270 barcodeSequencePart = SequenceString.NewInstance();
271 }
272 this.barcodeSequencePart = barcodeSequencePart;
273 }
274
275 /**
276 * Sets the {@link TermType#DnaMarker DNA marker} examined and described by this sequencing.
277 * The marker should usually be similar to the one used in the according {@link Amplification
278 * amplification process}. However, it may slightly differ, or, if multiple amplifications where
279 * used to build this consensus sequence it may be the super set of the markers used in amplification.
280 *
281 * @return
282 */
283 public DefinedTerm getDnaMarker(){
284 return this.dnaMarker;
285 }
286
287 /**
288 * @see #getDnaMarker()
289 * @param marker
290 */
291 public void setDnaMarker(DefinedTerm dnaMarker){
292 this.dnaMarker = dnaMarker;
293 }
294
295 /**
296 * The accession number used in GenBank, EMBL and DDBJ.
297 * @return
298 */
299 public String getGeneticAccessionNumber() {
300 return geneticAccessionNumber;
301 }
302
303 /**
304 * Sets the genetic accession number.
305 * @see #getGeneticAccessionNumber()
306 */
307 public void setGeneticAccessionNumber(String geneticAccessionNumber) {
308 this.geneticAccessionNumber = geneticAccessionNumber;
309 }
310
311
312 /**
313 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
314 */
315 public String getBoldProcessId() {
316 return boldProcessId;
317 }
318
319 public void setBoldProcessId(String boldProcessId) {
320 this.boldProcessId = boldProcessId;
321 }
322
323 /**
324 * Returns the name of the haplotype.
325 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
326 * A certain haplotype may be specific for an individual, a population or a species.
327 * @return
328 */
329 public String getHaplotype() {
330 return haplotype;
331 }
332
333 /**
334 * @see #getHaplotype()
335 */
336 public void setHaplotype(String haplotype) {
337 this.haplotype = haplotype;
338 }
339
340 /**
341 * The contigFile containing all data and data processing for this sequencing.
342 *
343 * @see #getConsensusSequence()
344 * @see #getSingleReads()
345 */
346 public Media getContigFile() {
347 return contigFile;
348 }
349
350 /**
351 * @see #getContigFile()
352 */
353 public void setContigFile(Media contigFile) {
354 this.contigFile = contigFile;
355 }
356
357
358 /**
359 * Citations are the set of references in which this sequence was published.
360 * Unlike taxonomic names the first publication of a sequence
361 * is not so important (maybe because it is required by publishers
362 * that they are all registered at Genbank) therefore we do not have something like an
363 * "original reference" attribute.<BR>
364 * Links to these references are to be stored within the reference itself.
365 * @return the set of references in which this sequence was published.
366 */
367 public Set<Reference> getCitations() {
368 return citations;
369 }
370 /**
371 * @see #getCitations()
372 */
373 protected void setCitations(Set<Reference> citations) {
374 this.citations = citations;
375 }
376 /**
377 * @see #getCitations()
378 */
379 public void addCitation(Reference citation) {
380 this.citations.add(citation);
381 }
382 /**
383 * @see #getCitations()
384 */
385 public void removeCitation(Reference citation) {
386 this.citations.remove(citation);
387 }
388
389 /**
390 * The {@link SingleRead single reads} that were used to build this consensus sequence.
391 *
392 * @see #getConsensusSequence()
393 * @see #getContigFile()
394 */
395 public Set<SingleRead> getSingleReads() {
396 return singleReads;
397 }
398 /**
399 * @see #getSingleReads()
400 */
401 public void addSingleRead(SingleRead singleRead) {
402 this.singleReads.add(singleRead);
403 }
404 /**
405 * @see #getSingleReads()
406 */
407 public void removeSingleRead(SingleRead singleRead) {
408 this.singleReads.remove(singleRead);
409 }
410 /**
411 * @see #getSingleReads()
412 */
413 //TODO private as long it is unclear how bidirectionality is handled
414 private void setSingleReads(Set<SingleRead> singleReads) {
415 this.singleReads = singleReads;
416 }
417
418
419 //*************************** Transient GETTER /SETTER *****************************/
420
421 /**
422 * Delegate method to get the text representation of the consensus sequence
423 * @see #setSequenceString(String)
424 */
425 @Transient
426 public String getSequenceString() {
427 return consensusSequence.getString();
428 }
429
430 /**
431 * Delegate method to set the text representation of the {@link #getConsensusSequence()
432 * consensus sequence}.
433 */
434 @Transient
435 public void setSequenceString(String sequence) {
436 consensusSequence.setString(sequence);
437 }
438
439 /**
440 * Convenience method which computes the set of all related pherograms
441 * @return the set of pherograms.
442 */
443 @Transient
444 public Set<Media> getPherograms(){
445 Set<Media> result = new HashSet<Media>();
446 for (SingleRead singleSeq : singleReads){
447 if (singleSeq.getPherogram() != null){
448 result.add(singleSeq.getPherogram());
449 }
450 }
451 return result;
452 }
453
454
455 //***** Registrations ************/
456 /**
457 * Returns the computed genBank uri.
458 * @return
459 */
460 @Transient
461 public URI getGenBankUri() {
462 return createExternalUri(GENBANK_BASE_URI);
463 }
464
465 /**
466 * Returns the computed EMBL uri.
467 * @return
468 */
469 @Transient
470 public URI getEmblUri() {
471 return createExternalUri(EMBL_BASE_URI);
472 }
473
474 /**
475 * Returns the computed DDBJ uri.
476 * @return
477 */
478 @Transient
479 public URI getDdbjUri() {
480 return createExternalUri(DDBJ_BASE_URI);
481 }
482
483 /**
484 * Returns the URI for the BOLD entry.
485 * @see #getBoldProcessId()
486 */
487 @Transient
488 public URI getBoldUri() {
489 return createExternalUri(BOLD_BASE_URI);
490 }
491
492
493 private URI createExternalUri(String baseUri){
494 if (StringUtils.isNotBlank(geneticAccessionNumber)){
495 return URI.create(String.format(baseUri, geneticAccessionNumber.trim()));
496 }else{
497 return null;
498 }
499 }
500
501
502 //*********************** CLONE ********************************************************/
503 /**
504 * Clones <i>this</i> sequence. This is a shortcut that enables to create
505 * a new instance that differs only slightly from <i>this</i> sequencing by
506 * modifying only some of the attributes.<BR><BR>
507 *
508 *
509 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
510 * @see java.lang.Object#clone()
511 */
512 @Override
513 public Object clone() {
514 try{
515 Sequence result = (Sequence)super.clone();
516
517 //sequences
518 result.consensusSequence = (SequenceString)this.consensusSequence.clone();
519 result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
520
521
522 //single sequences
523 result.singleReads = new HashSet<SingleRead>();
524 for (SingleRead seq: this.singleReads){
525 result.singleReads.add((SingleRead) seq);
526 }
527
528 //citations //TODO do we really want to copy these ??
529 result.citations = new HashSet<Reference>();
530 for (Reference ref: this.citations){
531 result.citations.add((Reference) ref);
532 }
533
534
535
536 return result;
537 }catch (CloneNotSupportedException e) {
538 logger.warn("Object does not implement cloneable");
539 e.printStackTrace();
540 return null;
541 }
542 }
543
544
545 }