3 * Copyright (C) 2016 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.taxeditor
.molecular
.io
;
13 import java
.io
.IOException
;
15 import java
.util
.ArrayList
;
16 import java
.util
.Collections
;
17 import java
.util
.Iterator
;
18 import java
.util
.List
;
20 import javax
.xml
.namespace
.QName
;
22 import eu
.etaxonomy
.cdm
.model
.molecular
.Sequence
;
23 import eu
.etaxonomy
.cdm
.model
.molecular
.SingleReadAlignment
;
24 import eu
.etaxonomy
.taxeditor
.molecular
.editor
.AlignmentEditor
;
25 import eu
.etaxonomy
.taxeditor
.molecular
.l10n
.Messages
;
26 import info
.bioinfweb
.commons
.bio
.CharacterStateSetType
;
27 import info
.bioinfweb
.commons
.io
.W3CXSConstants
;
28 import info
.bioinfweb
.commons
.text
.StringUtils
;
29 import info
.bioinfweb
.jphyloio
.ReadWriteConstants
;
30 import info
.bioinfweb
.jphyloio
.ReadWriteParameterMap
;
31 import info
.bioinfweb
.jphyloio
.dataadapters
.JPhyloIOEventReceiver
;
32 import info
.bioinfweb
.jphyloio
.dataadapters
.MatrixDataAdapter
;
33 import info
.bioinfweb
.jphyloio
.dataadapters
.ObjectListDataAdapter
;
34 import info
.bioinfweb
.jphyloio
.dataadapters
.implementations
.NoCharDefsNoSetsMatrixDataAdapter
;
35 import info
.bioinfweb
.jphyloio
.dataadapters
.implementations
.store
.StoreObjectListDataAdapter
;
36 import info
.bioinfweb
.jphyloio
.events
.CharacterSetIntervalEvent
;
37 import info
.bioinfweb
.jphyloio
.events
.LinkedLabeledIDEvent
;
38 import info
.bioinfweb
.jphyloio
.events
.SequenceTokensEvent
;
39 import info
.bioinfweb
.jphyloio
.events
.TokenSetDefinitionEvent
;
40 import info
.bioinfweb
.jphyloio
.events
.type
.EventContentType
;
41 import info
.bioinfweb
.jphyloio
.utils
.JPhyloIOWritingUtils
;
46 * In implementation of {@link MatrixDataAdapter} that delegates to a {@link Sequence} object.
48 * Note that this adapter stores information on the {@link Sequence} in creation. Modifying the sequence or linked
49 * objects will require to create a new instance of this class. Otherwise unexpected behavior may be the consequence.
54 public class CDMSequenceMatrixAdapter
extends NoCharDefsNoSetsMatrixDataAdapter
implements ReadWriteConstants
, SingleReadAlignmentRDFXMLConstants
{
56 private static final String NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
= Messages
.CDMSequenceMatrixAdapter_NO_SEQUENCE_FOUND
;
57 public static final String MATRIX_ID
= DEFAULT_MATRIX_ID_PREFIX
+ "ContigAlignment"; //$NON-NLS-1$
58 public static final String SINGLE_READ_SEQUENCE_ID_PREFIX
= DEFAULT_SEQUENCE_ID_PREFIX
+ "SingleRead"; //$NON-NLS-1$
59 public static final String CONSENSUS_SEQUENCE_ID
= DEFAULT_SEQUENCE_ID_PREFIX
+ "Consensus"; //$NON-NLS-1$
62 private Sequence sequence
;
63 private boolean exportConsensus
;
64 private List
<SingleReadAlignment
> singleReadList
;
65 private String consensusSequenceLabel
;
66 private ObjectListDataAdapter
<TokenSetDefinitionEvent
> tokenSetList
;
67 //TODO Also allow specifying single read labels?
71 * Creates a new instance of this class.
73 * @param sequence the <i>CDM</i> sequence object containing the data to be exported
74 * @param consensusSequenceLabel the label to be used for the consensus sequence (Maybe {@code null}.)
75 * @param exportConsensus Specify {@code true} here, if the consensus sequence shall be included in the export or {@code false}
77 * @param exportSingleReads Specify {@code true} here, if the single reads shall be included in the export or {@code false}
79 * @throws IllegalArgumentException if both {@code exportConsensus} and {@code exportSingleReads} are {@code false}
81 public CDMSequenceMatrixAdapter(Sequence sequence
, String consensusSequenceLabel
, boolean exportConsensus
, boolean exportSingleReads
) {
83 if (!exportConsensus
&& !exportSingleReads
) {
84 throw new IllegalArgumentException(Messages
.CDMSequenceMatrixAdapter_EITHER_MUST_BE_TRUE
);
87 this.sequence
= sequence
;
88 this.exportConsensus
= exportConsensus
;
89 this.consensusSequenceLabel
= consensusSequenceLabel
;
90 tokenSetList
= createTokenSetList();
91 if (exportSingleReads
) {
92 singleReadList
= new ArrayList
<SingleReadAlignment
>(sequence
.getSingleReadAlignments()); // Store references of single reads in defined order to allow random access.
93 //TODO Omit single reads that do not have an edited sequence yet?
96 singleReadList
= Collections
.emptyList();
102 private ObjectListDataAdapter
<TokenSetDefinitionEvent
> createTokenSetList() {
103 StoreObjectListDataAdapter
<TokenSetDefinitionEvent
> result
= new StoreObjectListDataAdapter
<TokenSetDefinitionEvent
>();
104 final String id
= ReadWriteConstants
.DEFAULT_TOKEN_SET_ID_PREFIX
;
105 result
.setObjectStartEvent(new TokenSetDefinitionEvent(CharacterStateSetType
.DNA
, id
, null));
106 long length
= getColumnCount(null); //TODO Change this expression, if column count should return -1 in the future.
107 if (length
> 0) { // Empty character set interval events are not allowed. //TODO Handle -1 separately
108 result
.getObjectContent(id
).add(new CharacterSetIntervalEvent(0, length
));
115 * @return the sequence
117 public Sequence
getCDMSequence() {
122 private int extractSingleReadIndexFromID(String sequenceID
) {
123 if (sequenceID
.startsWith(SINGLE_READ_SEQUENCE_ID_PREFIX
)) {
125 return Integer
.parseInt(sequenceID
.substring(SINGLE_READ_SEQUENCE_ID_PREFIX
.length()));
127 catch (NumberFormatException e
) {} // fall through
134 public LinkedLabeledIDEvent
getStartEvent(ReadWriteParameterMap parameters
) {
135 return new LinkedLabeledIDEvent(EventContentType
.ALIGNMENT
, MATRIX_ID
, "Contig alignment", null); //$NON-NLS-1$
136 //TODO Use label according to derivate and markers.
141 public boolean containsLongTokens(ReadWriteParameterMap parameters
) {
147 public long getColumnCount(ReadWriteParameterMap parameters
) {
148 return -1; // Indicates that sequences may have different lengths. (Otherwise writing files without sequence elongation would not be possible.)
153 public long getSequenceCount(ReadWriteParameterMap parameters
) {
155 if (!exportConsensus
) {
158 return singleReadList
.size() + addend
;
163 public Iterator
<String
> getSequenceIDIterator(ReadWriteParameterMap parameters
) {
164 return new SequenceIDIterator(singleReadList
.size(), exportConsensus
);
169 public long getSequenceLength(ReadWriteParameterMap parameters
, String sequenceID
) throws IllegalArgumentException
{
170 int singleReadIndex
= extractSingleReadIndexFromID(sequenceID
);
171 if (singleReadIndex
>= 0) {
172 String sequence
= singleReadList
.get(singleReadIndex
).getEditedSequence();
173 if (sequence
!= null) {
174 return sequence
.length();
176 else { // This would happen e.g. if not edited sequence was copied from pherogram yet.
180 else if (exportConsensus
&& CONSENSUS_SEQUENCE_ID
.equals(sequenceID
)) {
181 return getCDMSequence().getSequenceString().length();
184 throw new IllegalArgumentException(String
.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
, sequenceID
));
190 public LinkedLabeledIDEvent
getSequenceStartEvent(ReadWriteParameterMap parameters
, String sequenceID
) {
191 int singleReadIndex
= extractSingleReadIndexFromID(sequenceID
);
192 if (singleReadIndex
>= 0) {
193 return new LinkedLabeledIDEvent(EventContentType
.SEQUENCE
, sequenceID
, "Single read " + singleReadIndex
, null); //$NON-NLS-1$
194 //TODO Use name displayed in derivate hierarchy or specified name as label instead?
196 else if (exportConsensus
&& CONSENSUS_SEQUENCE_ID
.equals(sequenceID
)) {
197 return new LinkedLabeledIDEvent(EventContentType
.SEQUENCE
, sequenceID
, consensusSequenceLabel
, null);
200 throw new IllegalArgumentException(String
.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
, sequenceID
));
205 private void writeStringPart(JPhyloIOEventReceiver receiver
, String string
, long startColumn
, long endColumn
) throws IOException
{
206 if (string
!= null) {
207 receiver
.add(new SequenceTokensEvent(StringUtils
.charSequenceToStringList(
208 string
.substring((int)startColumn
, (int)endColumn
))));
213 private String
createMetadataID(String sequenceID
, QName predicate
) {
214 return sequenceID
+ "META" + predicate
.getLocalPart(); //$NON-NLS-1$
218 private void writeMetadataEvents(JPhyloIOEventReceiver receiver
, String sequenceID
, QName predicate
, QName dataType
, Object objectValue
)
221 if (objectValue
!= null) {
222 JPhyloIOWritingUtils
.writeSimpleLiteralMetadata(receiver
, createMetadataID(sequenceID
, predicate
), null,
223 predicate
, dataType
, objectValue
, null);
229 public void writeSequencePartContentData(ReadWriteParameterMap parameters
, JPhyloIOEventReceiver receiver
, String sequenceID
,
230 long startColumn
, long endColumn
) throws IOException
, IllegalArgumentException
{
232 int singleReadIndex
= extractSingleReadIndexFromID(sequenceID
);
233 if (singleReadIndex
>= 0) {
234 SingleReadAlignment singleRead
= singleReadList
.get(singleReadIndex
);
235 if (startColumn
== 0) {
236 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_IS_SINGLE_READ
, W3CXSConstants
.DATA_TYPE_BOOLEAN
, new Boolean(true));
237 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_IS_REVERSE_COMPLEMENTED
, W3CXSConstants
.DATA_TYPE_BOOLEAN
, new Boolean(singleRead
.isReverseComplement()));
238 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_HAS_LEFT_CUT_POSITION
, W3CXSConstants
.DATA_TYPE_INT
, singleRead
.getLeftCutPosition());
239 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_HAS_RIGHT_CUT_POSITION
, W3CXSConstants
.DATA_TYPE_INT
, singleRead
.getRightCutPosition());
241 URI pherogramURI
= AlignmentEditor
.getPherogramURI(singleRead
.getSingleRead());
242 if (pherogramURI
!= null) {
243 JPhyloIOWritingUtils
.writeTerminalResourceMetadata(receiver
, createMetadataID(sequenceID
, PREDICATE_HAS_PHEROGRAM
),
244 null, PREDICATE_HAS_PHEROGRAM
, pherogramURI
);
247 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_HAS_PHEROGRAM_ALIGNMENT
, DATA_TYPE_PHERORAGM_ALIGNMENT
, singleRead
.getShifts());
248 //TODO This needs to be written with sequence type XML!
250 writeStringPart(receiver
, singleRead
.getEditedSequence(), startColumn
, endColumn
);
252 else if (exportConsensus
&& CONSENSUS_SEQUENCE_ID
.equals(sequenceID
)) {
253 if (startColumn
== 0) {
254 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_IS_CONSENSUS_SEQUENCE
, W3CXSConstants
.DATA_TYPE_BOOLEAN
, new Boolean(true));
255 //TODO Possibly export additional properties of sequence (e.g. isBarcode(), getDdbjId(), ...) as metadata?
257 writeStringPart(receiver
, getCDMSequence().getSequenceString(), startColumn
, endColumn
);
260 throw new IllegalArgumentException(String
.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
, sequenceID
));
266 * @see info.bioinfweb.jphyloio.dataadapters.implementations.NoSetsMatrixDataAdapter#getTokenSets()
269 public ObjectListDataAdapter
<TokenSetDefinitionEvent
> getTokenSets(ReadWriteParameterMap parameters
) {