1 package eu
.etaxonomy
.taxeditor
.molecular
.io
;
3 import java
.io
.IOException
;
4 import java
.util
.ArrayList
;
5 import java
.util
.Collections
;
6 import java
.util
.Iterator
;
9 import javax
.xml
.namespace
.QName
;
11 import eu
.etaxonomy
.cdm
.common
.URI
;
12 import eu
.etaxonomy
.cdm
.model
.molecular
.Sequence
;
13 import eu
.etaxonomy
.cdm
.model
.molecular
.SingleReadAlignment
;
14 import eu
.etaxonomy
.taxeditor
.molecular
.editor
.e4
.AlignmentEditorE4
;
15 import eu
.etaxonomy
.taxeditor
.molecular
.l10n
.Messages
;
16 import info
.bioinfweb
.commons
.bio
.CharacterStateSetType
;
17 import info
.bioinfweb
.commons
.io
.W3CXSConstants
;
18 import info
.bioinfweb
.commons
.text
.StringUtils
;
19 import info
.bioinfweb
.jphyloio
.ReadWriteConstants
;
20 import info
.bioinfweb
.jphyloio
.ReadWriteParameterMap
;
21 import info
.bioinfweb
.jphyloio
.dataadapters
.JPhyloIOEventReceiver
;
22 import info
.bioinfweb
.jphyloio
.dataadapters
.MatrixDataAdapter
;
23 import info
.bioinfweb
.jphyloio
.dataadapters
.ObjectListDataAdapter
;
24 import info
.bioinfweb
.jphyloio
.dataadapters
.implementations
.NoCharDefsNoSetsMatrixDataAdapter
;
25 import info
.bioinfweb
.jphyloio
.dataadapters
.implementations
.store
.StoreObjectListDataAdapter
;
26 import info
.bioinfweb
.jphyloio
.events
.CharacterSetIntervalEvent
;
27 import info
.bioinfweb
.jphyloio
.events
.LinkedLabeledIDEvent
;
28 import info
.bioinfweb
.jphyloio
.events
.SequenceTokensEvent
;
29 import info
.bioinfweb
.jphyloio
.events
.TokenSetDefinitionEvent
;
30 import info
.bioinfweb
.jphyloio
.events
.type
.EventContentType
;
31 import info
.bioinfweb
.jphyloio
.utils
.JPhyloIOWritingUtils
;
34 * In implementation of {@link MatrixDataAdapter} that delegates to a {@link Sequence} object.
36 * Note that this adapter stores information on the {@link Sequence} in creation. Modifying the sequence or linked
37 * objects will require to create a new instance of this class. Otherwise unexpected behavior may be the consequence.
42 public class CDMSequenceMatrixAdapter
extends NoCharDefsNoSetsMatrixDataAdapter
implements ReadWriteConstants
, SingleReadAlignmentRDFXMLConstants
{
44 private static final String NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
= Messages
.CDMSequenceMatrixAdapter_NO_SEQUENCE_FOUND
;
45 public static final String MATRIX_ID
= DEFAULT_MATRIX_ID_PREFIX
+ "ContigAlignment"; //$NON-NLS-1$
46 public static final String SINGLE_READ_SEQUENCE_ID_PREFIX
= DEFAULT_SEQUENCE_ID_PREFIX
+ "SingleRead"; //$NON-NLS-1$
47 public static final String CONSENSUS_SEQUENCE_ID
= DEFAULT_SEQUENCE_ID_PREFIX
+ "Consensus"; //$NON-NLS-1$
50 private Sequence sequence
;
51 private boolean exportConsensus
;
52 private List
<SingleReadAlignment
> singleReadList
;
53 private String consensusSequenceLabel
;
54 private ObjectListDataAdapter
<TokenSetDefinitionEvent
> tokenSetList
;
55 //TODO Also allow specifying single read labels?
59 * Creates a new instance of this class.
61 * @param sequence the <i>CDM</i> sequence object containing the data to be exported
62 * @param consensusSequenceLabel the label to be used for the consensus sequence (Maybe {@code null}.)
63 * @param exportConsensus Specify {@code true} here, if the consensus sequence shall be included in the export or {@code false}
65 * @param exportSingleReads Specify {@code true} here, if the single reads shall be included in the export or {@code false}
67 * @throws IllegalArgumentException if both {@code exportConsensus} and {@code exportSingleReads} are {@code false}
69 public CDMSequenceMatrixAdapter(Sequence sequence
, String consensusSequenceLabel
, boolean exportConsensus
, boolean exportSingleReads
) {
71 if (!exportConsensus
&& !exportSingleReads
) {
72 throw new IllegalArgumentException(Messages
.CDMSequenceMatrixAdapter_EITHER_MUST_BE_TRUE
);
75 this.sequence
= sequence
;
76 this.exportConsensus
= exportConsensus
;
77 this.consensusSequenceLabel
= consensusSequenceLabel
;
78 tokenSetList
= createTokenSetList();
79 if (exportSingleReads
) {
80 singleReadList
= new ArrayList
<SingleReadAlignment
>(sequence
.getSingleReadAlignments()); // Store references of single reads in defined order to allow random access.
81 //TODO Omit single reads that do not have an edited sequence yet?
84 singleReadList
= Collections
.emptyList();
90 private ObjectListDataAdapter
<TokenSetDefinitionEvent
> createTokenSetList() {
91 StoreObjectListDataAdapter
<TokenSetDefinitionEvent
> result
= new StoreObjectListDataAdapter
<TokenSetDefinitionEvent
>();
92 final String id
= ReadWriteConstants
.DEFAULT_TOKEN_SET_ID_PREFIX
;
93 result
.setObjectStartEvent(new TokenSetDefinitionEvent(CharacterStateSetType
.DNA
, id
, null));
94 long length
= getColumnCount(null); //TODO Change this expression, if column count should return -1 in the future.
95 if (length
> 0) { // Empty character set interval events are not allowed. //TODO Handle -1 separately
96 result
.getObjectContent(id
).add(new CharacterSetIntervalEvent(0, length
));
103 * @return the sequence
105 public Sequence
getCDMSequence() {
110 private int extractSingleReadIndexFromID(String sequenceID
) {
111 if (sequenceID
.startsWith(SINGLE_READ_SEQUENCE_ID_PREFIX
)) {
113 return Integer
.parseInt(sequenceID
.substring(SINGLE_READ_SEQUENCE_ID_PREFIX
.length()));
115 catch (NumberFormatException e
) {} // fall through
122 public LinkedLabeledIDEvent
getStartEvent(ReadWriteParameterMap parameters
) {
123 return new LinkedLabeledIDEvent(EventContentType
.ALIGNMENT
, MATRIX_ID
, "Contig alignment", null); //$NON-NLS-1$
124 //TODO Use label according to derivate and markers.
129 public boolean containsLongTokens(ReadWriteParameterMap parameters
) {
135 public long getColumnCount(ReadWriteParameterMap parameters
) {
136 return -1; // Indicates that sequences may have different lengths. (Otherwise writing files without sequence elongation would not be possible.)
141 public long getSequenceCount(ReadWriteParameterMap parameters
) {
143 if (!exportConsensus
) {
146 return singleReadList
.size() + addend
;
151 public Iterator
<String
> getSequenceIDIterator(ReadWriteParameterMap parameters
) {
152 return new SequenceIDIterator(singleReadList
.size(), exportConsensus
);
157 public long getSequenceLength(ReadWriteParameterMap parameters
, String sequenceID
) throws IllegalArgumentException
{
158 int singleReadIndex
= extractSingleReadIndexFromID(sequenceID
);
159 if (singleReadIndex
>= 0) {
160 String sequence
= singleReadList
.get(singleReadIndex
).getEditedSequence();
161 if (sequence
!= null) {
162 return sequence
.length();
164 else { // This would happen e.g. if not edited sequence was copied from pherogram yet.
168 else if (exportConsensus
&& CONSENSUS_SEQUENCE_ID
.equals(sequenceID
)) {
169 return getCDMSequence().getSequenceString().length();
172 throw new IllegalArgumentException(String
.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
, sequenceID
));
178 public LinkedLabeledIDEvent
getSequenceStartEvent(ReadWriteParameterMap parameters
, String sequenceID
) {
179 int singleReadIndex
= extractSingleReadIndexFromID(sequenceID
);
180 if (singleReadIndex
>= 0) {
181 return new LinkedLabeledIDEvent(EventContentType
.SEQUENCE
, sequenceID
, "Single read " + singleReadIndex
, null); //$NON-NLS-1$
182 //TODO Use name displayed in derivate hierarchy or specified name as label instead?
184 else if (exportConsensus
&& CONSENSUS_SEQUENCE_ID
.equals(sequenceID
)) {
185 return new LinkedLabeledIDEvent(EventContentType
.SEQUENCE
, sequenceID
, consensusSequenceLabel
, null);
188 throw new IllegalArgumentException(String
.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
, sequenceID
));
193 private void writeStringPart(JPhyloIOEventReceiver receiver
, String string
, long startColumn
, long endColumn
) throws IOException
{
194 if (string
!= null) {
195 receiver
.add(new SequenceTokensEvent(StringUtils
.charSequenceToStringList(
196 string
.substring((int)startColumn
, (int)endColumn
))));
201 private String
createMetadataID(String sequenceID
, QName predicate
) {
202 return sequenceID
+ "META" + predicate
.getLocalPart(); //$NON-NLS-1$
206 private void writeMetadataEvents(JPhyloIOEventReceiver receiver
, String sequenceID
, QName predicate
, QName dataType
, Object objectValue
)
209 if (objectValue
!= null) {
210 JPhyloIOWritingUtils
.writeSimpleLiteralMetadata(receiver
, createMetadataID(sequenceID
, predicate
), null,
211 predicate
, dataType
, objectValue
, null);
217 public void writeSequencePartContentData(ReadWriteParameterMap parameters
, JPhyloIOEventReceiver receiver
, String sequenceID
,
218 long startColumn
, long endColumn
) throws IOException
, IllegalArgumentException
{
220 int singleReadIndex
= extractSingleReadIndexFromID(sequenceID
);
221 if (singleReadIndex
>= 0) {
222 SingleReadAlignment singleRead
= singleReadList
.get(singleReadIndex
);
223 if (startColumn
== 0) {
224 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_IS_SINGLE_READ
, W3CXSConstants
.DATA_TYPE_BOOLEAN
, new Boolean(true));
225 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_IS_REVERSE_COMPLEMENTED
, W3CXSConstants
.DATA_TYPE_BOOLEAN
, new Boolean(singleRead
.isReverseComplement()));
226 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_HAS_LEFT_CUT_POSITION
, W3CXSConstants
.DATA_TYPE_INT
, singleRead
.getLeftCutPosition());
227 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_HAS_RIGHT_CUT_POSITION
, W3CXSConstants
.DATA_TYPE_INT
, singleRead
.getRightCutPosition());
229 URI pherogramURI
= AlignmentEditorE4
.getPherogramURI(singleRead
.getSingleRead());
230 if (pherogramURI
!= null) {
231 JPhyloIOWritingUtils
.writeTerminalResourceMetadata(receiver
, createMetadataID(sequenceID
, PREDICATE_HAS_PHEROGRAM
),
232 null, PREDICATE_HAS_PHEROGRAM
, pherogramURI
.getJavaUri());
235 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_HAS_PHEROGRAM_ALIGNMENT
, DATA_TYPE_PHERORAGM_ALIGNMENT
, singleRead
.getShifts());
236 //TODO This needs to be written with sequence type XML!
238 writeStringPart(receiver
, singleRead
.getEditedSequence(), startColumn
, endColumn
);
240 else if (exportConsensus
&& CONSENSUS_SEQUENCE_ID
.equals(sequenceID
)) {
241 if (startColumn
== 0) {
242 writeMetadataEvents(receiver
, sequenceID
, PREDICATE_IS_CONSENSUS_SEQUENCE
, W3CXSConstants
.DATA_TYPE_BOOLEAN
, new Boolean(true));
243 //TODO Possibly export additional properties of sequence (e.g. isBarcode(), getDdbjId(), ...) as metadata?
245 writeStringPart(receiver
, getCDMSequence().getSequenceString(), startColumn
, endColumn
);
248 throw new IllegalArgumentException(String
.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND
, sequenceID
));
254 * @see info.bioinfweb.jphyloio.dataadapters.implementations.NoSetsMatrixDataAdapter#getTokenSets()
257 public ObjectListDataAdapter
<TokenSetDefinitionEvent
> getTokenSets(ReadWriteParameterMap parameters
) {