merge trunk to cdm-3.3 branch
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / model / molecular / Sequence.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.model.molecular;
10
11
12 import java.net.URI;
13 import java.util.HashSet;
14 import java.util.Set;
15
16 import javax.persistence.Entity;
17 import javax.persistence.FetchType;
18 import javax.persistence.ManyToMany;
19 import javax.persistence.ManyToOne;
20 import javax.persistence.Transient;
21 import javax.validation.constraints.Size;
22 import javax.xml.bind.annotation.XmlAccessType;
23 import javax.xml.bind.annotation.XmlAccessorType;
24 import javax.xml.bind.annotation.XmlAttribute;
25 import javax.xml.bind.annotation.XmlElement;
26 import javax.xml.bind.annotation.XmlElementWrapper;
27 import javax.xml.bind.annotation.XmlIDREF;
28 import javax.xml.bind.annotation.XmlRootElement;
29 import javax.xml.bind.annotation.XmlSchemaType;
30 import javax.xml.bind.annotation.XmlType;
31
32 import org.apache.log4j.Logger;
33 import org.codehaus.plexus.util.StringUtils;
34 import org.hibernate.annotations.Cascade;
35 import org.hibernate.annotations.CascadeType;
36 import org.hibernate.envers.Audited;
37 import org.hibernate.search.annotations.IndexedEmbedded;
38 import org.springframework.beans.factory.annotation.Configurable;
39
40 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
41 import eu.etaxonomy.cdm.model.common.DefinedTerm;
42 import eu.etaxonomy.cdm.model.common.TermType;
43 import eu.etaxonomy.cdm.model.media.Media;
44 import eu.etaxonomy.cdm.model.reference.Reference;
45
46 /**
47 * Alignment of multiple single sequences to a consensus sequence,
48 * may also include the extracted barcode sequence.
49 *
50 * This class holds information about both the combining process of
51 * {@link SingleSequence single sequences} to one consensus sequence
52 * (singleSequences, contigFile) as well as sequence related information.
53 * The later includes the sequence string itself, important genetic information
54 * (marker, haplotype) as well as registration information (genetic accession number)
55 * citations and barcoding information.
56 *
57 * @author m.doering
58 * @created 08-Nov-2007 13:06:51
59 * @author a.mueller
60 * @updated 11-Jul-2013
61 */
62 @XmlAccessorType(XmlAccessType.FIELD)
63 @XmlType(name = "Sequence", propOrder = {
64 "dnaSample",
65 "consensusSequence",
66 "isBarcode",
67 "barcodeSequencePart",
68 "marker",
69 "geneticAccessionNumber",
70 "boldProcessId",
71 "haplotype",
72 "contigFile",
73 "singleSequences",
74 "citations"
75 })
76 @XmlRootElement(name = "Sequencing")
77 @Entity
78 @Audited
79 @Configurable
80 //@Table(appliesTo="Sequence", indexes = { @Index(name = "sequenceTitleCacheIndex", columnNames = { "titleCache" }) })
81 public class Sequence extends AnnotatableEntity implements Cloneable{
82 private static final long serialVersionUID = 8298983152731241775L;
83 private static final Logger logger = Logger.getLogger(Sequence.class);
84
85 private static final String GENBANK_BASE_URI = "http://www.ncbi.nlm.nih.gov/nuccore/%s";
86 private static final String EMBL_BASE_URI = "http://www.ebi.ac.uk/ena/data/view/%s";
87 private static final String DDBJ_BASE_URI = "http://getentry.ddbj.nig.ac.jp/getentry/na/%s/?filetype=html";
88 private static final String BOLD_BASE_URI = "http://www.boldsystems.org/index.php/Public_RecordView?processid=%s";
89
90 @XmlElement( name = "DnaSample")
91 @XmlIDREF
92 @XmlSchemaType(name = "IDREF")
93 @ManyToOne(fetch = FetchType.LAZY)
94 @IndexedEmbedded
95 private DnaSample dnaSample;
96
97
98 /** @see #getContigFile() */
99 @XmlElement(name = "ContigFile")
100 @XmlIDREF
101 @XmlSchemaType(name = "IDREF")
102 @ManyToOne(fetch = FetchType.LAZY)
103 private Media contigFile;
104
105 /** @see #getConsensusSequence() */
106 @XmlElement(name = "ConsensusSequence")
107 private SequenceString consensusSequence = SequenceString.NewInstance();
108
109 // /**{@link #getSequence()}*/
110 // @XmlElement(name = "Sequence")
111 // @Lob
112 // private String sequence;
113 //
114 // @XmlElement(name = "Length")
115 // private Integer length;
116
117
118 @XmlAttribute(name = "isBarcode")
119 private Boolean isBarcode = null;
120
121 /** @see #getBarcodeSequence()*/
122 @XmlElement(name = "BarcodeSequencePart")
123 private SequenceString barcodeSequencePart = SequenceString.NewInstance();
124
125 /** @see #getGeneticAccessionNumber()*/
126 @XmlElement(name = "GeneticAccessionNumber")
127 @Size(max=20)
128 private String geneticAccessionNumber;
129
130 /** @see #getBoldProcessId() */
131 @XmlElement(name = "BoldProcessId")
132 @Size(max=20)
133 private String boldProcessId;
134
135 @XmlElementWrapper(name = "SingleSequences")
136 @XmlElement(name = "SingleSequence")
137 @XmlIDREF
138 @XmlSchemaType(name = "IDREF")
139 @ManyToMany(fetch = FetchType.LAZY)
140 @Cascade({CascadeType.SAVE_UPDATE})
141 private Set<SingleSequence> singleSequences = new HashSet<SingleSequence>();
142
143 /** @see #getMarker() */
144 @XmlElement(name = "Marker")
145 @XmlIDREF
146 @XmlSchemaType(name = "IDREF")
147 @ManyToOne(fetch = FetchType.LAZY)
148 //no cascade as it is a defined term
149 private DefinedTerm marker;
150
151
152 /** @see #getHaplotype() */
153 @XmlElement(name = "Haplotype")
154 @Size(max=100)
155 private String haplotype;
156
157 /** @see #getCitations() */
158 @XmlElementWrapper(name = "Citations")
159 @XmlElement(name = "Citation")
160 @XmlIDREF
161 @XmlSchemaType(name = "IDREF")
162 @ManyToMany(fetch = FetchType.LAZY)
163 @Cascade({CascadeType.SAVE_UPDATE})
164 private Set<Reference> citations = new HashSet<Reference>();
165
166 // //should be calculated in case sequence is set
167 // @XmlElement (name = "DateSequenced", type= String.class)
168 // @XmlJavaTypeAdapter(DateTimeAdapter.class)
169 // @Type(type="dateTimeUserType")
170 // @Basic(fetch = FetchType.LAZY)
171 // private DateTime dateSequenced;
172
173
174 //*********************** FACTORY ****************************************************/
175
176 public static Sequence NewInstance(String consensusSequence){
177 Sequence result = new Sequence();
178 result.setSequenceString(consensusSequence);
179 return result;
180 }
181
182 //*********************** CONSTRUCTOR ****************************************************/
183
184 protected Sequence() {}
185
186 //*********************** GETTER / SETTER ****************************************************/
187
188
189 /**
190 * The {@link DnaSample dna sample} this sequencing belongs too.
191 */
192 public DnaSample getDnaSample() {
193 return dnaSample;
194 }
195
196 //TODO bidirectionality??
197 /**
198 * @see #getDnaSample()
199 */
200 private void setDnaSample(DnaSample dnaSample) {
201 this.dnaSample = dnaSample;
202 }
203
204 /**
205 * The consensus sequence achieved by this sequencing.
206 */
207 public SequenceString getConsensusSequence() {
208 return consensusSequence;
209 }
210
211
212 /**
213 * @see #getConsensusSequence()
214 */
215 public void setConsensusSequence(SequenceString sequenceString) {
216 if (sequenceString == null){
217 sequenceString = SequenceString.NewInstance();
218 }
219 this.consensusSequence = sequenceString;
220 }
221
222 /**
223 * The isBarcode flag should be set to true if this (consensus) sequence is or includes
224 * a barcode sequence. If the barcode sequence is only a part of the consensus sequence
225 * this part is to be stored as {@link #getBarcodeSequencePart() barcode sequence part}.
226 * A isBarcode value of <code>null</code> indicates that we do have no knowledge
227 * wether the sequence is a barcoding sequence or not.
228 *
229 * @see #getBarcodeSequencePart()
230 * @see #getSequenceString()
231 * @returns the isBarcode flag value (tri-state)
232 *
233 */
234 public Boolean getIsBarcode() {
235 return isBarcode;
236 }
237
238 /**
239 * @see #getIsBarcode()
240 * @see #getBarcodeSequencePart()
241 */
242 public void setIsBarcode(Boolean isBarcode) {
243 this.isBarcode = isBarcode;
244 }
245
246 /**
247 * If the barcode sequence string does not include 100% of the (consensus) sequence
248 * the part used as barcode is provided here. However, the barcode part
249 * should be kept if consensus sequence string and barcode sequence string are equal.
250 *
251 * @see #getIsBarcode()
252 */
253 public SequenceString getBarcodeSequencePart() {
254 return barcodeSequencePart;
255 }
256
257 /**
258 * @see #getBarcodeSequence()
259 */
260 public void setBarcodeSequence(SequenceString barcodeSequencePart) {
261 if (barcodeSequencePart == null){
262 barcodeSequencePart = SequenceString.NewInstance();
263 }
264 this.barcodeSequencePart = barcodeSequencePart;
265 }
266
267 /**
268 * Sets the {@link TermType#DnaMarker marker} examined and described by this sequencing.
269 * @return
270 */
271 public DefinedTerm getMarker(){
272 return this.marker;
273 }
274
275 /**
276 * @see #getMarker()
277 * @param marker
278 */
279 public void setMarker(DefinedTerm marker){
280 this.marker = marker;
281 }
282
283 /**
284 * The accession number used in GenBank, EMBL and DDBJ.
285 * @return
286 */
287 public String getGeneticAccessionNumber() {
288 return geneticAccessionNumber;
289 }
290
291 /**
292 * Sets the genetic accession number.
293 * @see #getGeneticAccessionNumber()
294 */
295 public void setGeneticAccessionNumber(String geneticAccessionNumber) {
296 this.geneticAccessionNumber = geneticAccessionNumber;
297 }
298
299
300 /**
301 * The identifier used by the Barcode of Life Data Systems (BOLD, http://www.boldsystems.org/).
302 */
303 public String getBoldProcessId() {
304 return boldProcessId;
305 }
306
307 public void setBoldProcessId(String boldProcessId) {
308 this.boldProcessId = boldProcessId;
309 }
310
311 /**
312 * Returns the name of the haplotype.
313 * A haplotype (haploide genotype) is a variant of nucleotide sequences on the same chromosome.
314 * A certain haplotype may be specific for an individual, a population or a species.
315 * @return
316 */
317 public String getHaplotype() {
318 return haplotype;
319 }
320
321 /**
322 * @see #getHaplotype()
323 */
324 public void setHaplotype(String haplotype) {
325 this.haplotype = haplotype;
326 }
327
328 /**
329 * The contigFile containing all data and data processing for this sequencing.
330 */
331 public Media getContigFile() {
332 return contigFile;
333 }
334
335 /**
336 * @see #getContigFile()
337 */
338 public void setContigFile(Media contigFile) {
339 this.contigFile = contigFile;
340 }
341
342
343 /**
344 * Citations are the set of references in which this sequence was published.
345 * Unlike taxonomic names the first publication of a sequence
346 * is not so important (maybe because it is required by publishers
347 * that they are all registered at Genbank) therefore we do not have something like an
348 * "original reference" attribute.<BR>
349 * Links to these references are to be stored within the reference itself.
350 * @return the set of references in which this sequence was published.
351 */
352 public Set<Reference> getCitations() {
353 return citations;
354 }
355 /**
356 * @see #getCitations()
357 */
358 protected void setCitations(Set<Reference> citations) {
359 this.citations = citations;
360 }
361 /**
362 * @see #getCitations()
363 */
364 public void addCitation(Reference citation) {
365 this.citations.add(citation);
366 }
367 /**
368 * @see #getCitations()
369 */
370 public void removeCitation(Reference citation) {
371 this.citations.remove(citation);
372 }
373
374 /**
375 * The single sequences that where used to create this consensus sequence.
376 */
377 public Set<SingleSequence> getSingleSequences() {
378 return singleSequences;
379 }
380 /**
381 * @see #getSingleSequences()
382 */
383 public void addSingleSquence(SingleSequence singleSequence) {
384 this.singleSequences.add(singleSequence);
385 }
386 /**
387 * @see #getSingleSequences()
388 */
389 public void removeSingleSquence(SingleSequence singleSequence) {
390 this.singleSequences.remove(singleSequence);
391 }
392 /**
393 * @see #getSingleSequences()
394 */
395 //TODO private as long it is unclear how bidirectionality is handled
396 private void setSingleSequences(Set<SingleSequence> singleSequences) {
397 this.singleSequences = singleSequences;
398 }
399
400
401 //*************************** Transient GETTER /SETTER *****************************/
402
403 /**
404 * Delegate method to get the text representation of the consensus sequence
405 * @see #setSequenceString(String)
406 */
407 @Transient
408 public String getSequenceString() {
409 return consensusSequence.getString();
410 }
411
412 /**
413 * Delegate method to set the text representation of the {@link #getConsensusSequence()
414 * consensus sequence}.
415 */
416 @Transient
417 public void setSequenceString(String sequence) {
418 consensusSequence.setString(sequence);
419 }
420
421 /**
422 * Convenience method which computes the set of all related pherograms
423 * @return the set of pherograms.
424 */
425 @Transient
426 public Set<Media> getPherograms(){
427 Set<Media> result = new HashSet<Media>();
428 for (SingleSequence singleSeq : singleSequences){
429 if (singleSeq.getPherogram() != null){
430 result.add(singleSeq.getPherogram());
431 }
432 }
433 return result;
434 }
435
436
437 //***** Registrations ************/
438 /**
439 * Returns the computed genBank uri.
440 * @return
441 */
442 @Transient
443 public URI getGenBankUri() {
444 return createExternalUri(GENBANK_BASE_URI);
445 }
446
447 /**
448 * Returns the computed EMBL uri.
449 * @return
450 */
451 @Transient
452 public URI getEmblUri() {
453 return createExternalUri(EMBL_BASE_URI);
454 }
455
456 /**
457 * Returns the computed DDBJ uri.
458 * @return
459 */
460 @Transient
461 public URI getDdbjUri() {
462 return createExternalUri(DDBJ_BASE_URI);
463 }
464
465 /**
466 * Returns the URI for the BOLD entry.
467 * @see #getBoldProcessId()
468 */
469 @Transient
470 public URI getBoldUri() {
471 return createExternalUri(BOLD_BASE_URI);
472 }
473
474
475 private URI createExternalUri(String baseUri){
476 if (StringUtils.isNotBlank(geneticAccessionNumber)){
477 return URI.create(String.format(baseUri, geneticAccessionNumber.trim()));
478 }else{
479 return null;
480 }
481 }
482
483
484 //*********************** CLONE ********************************************************/
485 /**
486 * Clones <i>this</i> sequence. This is a shortcut that enables to create
487 * a new instance that differs only slightly from <i>this</i> sequencing by
488 * modifying only some of the attributes.<BR><BR>
489 *
490 *
491 * @see eu.etaxonomy.cdm.model.media.IdentifiableEntity#clone()
492 * @see java.lang.Object#clone()
493 */
494 @Override
495 public Object clone() {
496 try{
497 Sequence result = (Sequence)super.clone();
498
499 //sequences
500 result.consensusSequence = (SequenceString)this.consensusSequence.clone();
501 result.barcodeSequencePart = (SequenceString)this.barcodeSequencePart.clone();
502
503
504 //single sequences
505 result.singleSequences = new HashSet<SingleSequence>();
506 for (SingleSequence seq: this.singleSequences){
507 result.singleSequences.add((SingleSequence) seq);
508 }
509
510 //citations //TODO do we really want to copy these ??
511 result.citations = new HashSet<Reference>();
512 for (Reference ref: this.citations){
513 result.citations.add((Reference) ref);
514 }
515
516
517
518 return result;
519 }catch (CloneNotSupportedException e) {
520 logger.warn("Object does not implement cloneable");
521 e.printStackTrace();
522 return null;
523 }
524 }
525
526
527 }