Merge branch 'release/5.19.0'
[taxeditor.git] / eu.etaxonomy.taxeditor.molecular / src / main / java / eu / etaxonomy / taxeditor / molecular / io / CDMSequenceMatrixAdapter.java
1 package eu.etaxonomy.taxeditor.molecular.io;
2
3 import java.io.IOException;
4 import java.util.ArrayList;
5 import java.util.Collections;
6 import java.util.Iterator;
7 import java.util.List;
8
9 import javax.xml.namespace.QName;
10
11 import eu.etaxonomy.cdm.common.URI;
12 import eu.etaxonomy.cdm.model.molecular.Sequence;
13 import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
14 import eu.etaxonomy.taxeditor.molecular.editor.e4.AlignmentEditorE4;
15 import eu.etaxonomy.taxeditor.molecular.l10n.Messages;
16 import info.bioinfweb.commons.bio.CharacterStateSetType;
17 import info.bioinfweb.commons.io.W3CXSConstants;
18 import info.bioinfweb.commons.text.StringUtils;
19 import info.bioinfweb.jphyloio.ReadWriteConstants;
20 import info.bioinfweb.jphyloio.ReadWriteParameterMap;
21 import info.bioinfweb.jphyloio.dataadapters.JPhyloIOEventReceiver;
22 import info.bioinfweb.jphyloio.dataadapters.MatrixDataAdapter;
23 import info.bioinfweb.jphyloio.dataadapters.ObjectListDataAdapter;
24 import info.bioinfweb.jphyloio.dataadapters.implementations.NoCharDefsNoSetsMatrixDataAdapter;
25 import info.bioinfweb.jphyloio.dataadapters.implementations.store.StoreObjectListDataAdapter;
26 import info.bioinfweb.jphyloio.events.CharacterSetIntervalEvent;
27 import info.bioinfweb.jphyloio.events.LinkedLabeledIDEvent;
28 import info.bioinfweb.jphyloio.events.SequenceTokensEvent;
29 import info.bioinfweb.jphyloio.events.TokenSetDefinitionEvent;
30 import info.bioinfweb.jphyloio.events.type.EventContentType;
31 import info.bioinfweb.jphyloio.utils.JPhyloIOWritingUtils;
32
33 /**
34 * In implementation of {@link MatrixDataAdapter} that delegates to a {@link Sequence} object.
35 * <p>
36 * Note that this adapter stores information on the {@link Sequence} in creation. Modifying the sequence or linked
37 * objects will require to create a new instance of this class. Otherwise unexpected behavior may be the consequence.
38 *
39 * @author Ben Stöver
40 * @date 29.04.2016
41 */
42 public class CDMSequenceMatrixAdapter extends NoCharDefsNoSetsMatrixDataAdapter implements ReadWriteConstants, SingleReadAlignmentRDFXMLConstants {
43
44 private static final String NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND = Messages.CDMSequenceMatrixAdapter_NO_SEQUENCE_FOUND;
45 public static final String MATRIX_ID = DEFAULT_MATRIX_ID_PREFIX + "ContigAlignment"; //$NON-NLS-1$
46 public static final String SINGLE_READ_SEQUENCE_ID_PREFIX = DEFAULT_SEQUENCE_ID_PREFIX + "SingleRead"; //$NON-NLS-1$
47 public static final String CONSENSUS_SEQUENCE_ID= DEFAULT_SEQUENCE_ID_PREFIX + "Consensus"; //$NON-NLS-1$
48
49
50 private Sequence sequence;
51 private boolean exportConsensus;
52 private List<SingleReadAlignment> singleReadList;
53 private String consensusSequenceLabel;
54 private ObjectListDataAdapter<TokenSetDefinitionEvent> tokenSetList;
55 //TODO Also allow specifying single read labels?
56
57
58 /**
59 * Creates a new instance of this class.
60 *
61 * @param sequence the <i>CDM</i> sequence object containing the data to be exported
62 * @param consensusSequenceLabel the label to be used for the consensus sequence (Maybe {@code null}.)
63 * @param exportConsensus Specify {@code true} here, if the consensus sequence shall be included in the export or {@code false}
64 * otherwise.
65 * @param exportSingleReads Specify {@code true} here, if the single reads shall be included in the export or {@code false}
66 * otherwise.
67 * @throws IllegalArgumentException if both {@code exportConsensus} and {@code exportSingleReads} are {@code false}
68 */
69 public CDMSequenceMatrixAdapter(Sequence sequence, String consensusSequenceLabel, boolean exportConsensus, boolean exportSingleReads) {
70 super();
71 if (!exportConsensus && !exportSingleReads) {
72 throw new IllegalArgumentException(Messages.CDMSequenceMatrixAdapter_EITHER_MUST_BE_TRUE);
73 }
74 else {
75 this.sequence = sequence;
76 this.exportConsensus = exportConsensus;
77 this.consensusSequenceLabel = consensusSequenceLabel;
78 tokenSetList = createTokenSetList();
79 if (exportSingleReads) {
80 singleReadList = new ArrayList<SingleReadAlignment>(sequence.getSingleReadAlignments()); // Store references of single reads in defined order to allow random access.
81 //TODO Omit single reads that do not have an edited sequence yet?
82 }
83 else {
84 singleReadList = Collections.emptyList();
85 }
86 }
87 }
88
89
90 private ObjectListDataAdapter<TokenSetDefinitionEvent> createTokenSetList() {
91 StoreObjectListDataAdapter<TokenSetDefinitionEvent> result = new StoreObjectListDataAdapter<TokenSetDefinitionEvent>();
92 final String id = ReadWriteConstants.DEFAULT_TOKEN_SET_ID_PREFIX;
93 result.setObjectStartEvent(new TokenSetDefinitionEvent(CharacterStateSetType.DNA, id, null));
94 long length = getColumnCount(null); //TODO Change this expression, if column count should return -1 in the future.
95 if (length > 0) { // Empty character set interval events are not allowed. //TODO Handle -1 separately
96 result.getObjectContent(id).add(new CharacterSetIntervalEvent(0, length));
97 }
98 return result;
99 }
100
101
102 /**
103 * @return the sequence
104 */
105 public Sequence getCDMSequence() {
106 return sequence;
107 }
108
109
110 private int extractSingleReadIndexFromID(String sequenceID) {
111 if (sequenceID.startsWith(SINGLE_READ_SEQUENCE_ID_PREFIX)) {
112 try {
113 return Integer.parseInt(sequenceID.substring(SINGLE_READ_SEQUENCE_ID_PREFIX.length()));
114 }
115 catch (NumberFormatException e) {} // fall through
116 }
117 return -1;
118 }
119
120
121 @Override
122 public LinkedLabeledIDEvent getStartEvent(ReadWriteParameterMap parameters) {
123 return new LinkedLabeledIDEvent(EventContentType.ALIGNMENT, MATRIX_ID, "Contig alignment", null); //$NON-NLS-1$
124 //TODO Use label according to derivate and markers.
125 }
126
127
128 @Override
129 public boolean containsLongTokens(ReadWriteParameterMap parameters) {
130 return false;
131 }
132
133
134 @Override
135 public long getColumnCount(ReadWriteParameterMap parameters) {
136 return -1; // Indicates that sequences may have different lengths. (Otherwise writing files without sequence elongation would not be possible.)
137 }
138
139
140 @Override
141 public long getSequenceCount(ReadWriteParameterMap parameters) {
142 int addend = 1;
143 if (!exportConsensus) {
144 addend = 0;
145 }
146 return singleReadList.size() + addend;
147 }
148
149
150 @Override
151 public Iterator<String> getSequenceIDIterator(ReadWriteParameterMap parameters) {
152 return new SequenceIDIterator(singleReadList.size(), exportConsensus);
153 }
154
155
156 @Override
157 public long getSequenceLength(ReadWriteParameterMap parameters, String sequenceID) throws IllegalArgumentException {
158 int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
159 if (singleReadIndex >= 0) {
160 String sequence = singleReadList.get(singleReadIndex).getEditedSequence();
161 if (sequence != null) {
162 return sequence.length();
163 }
164 else { // This would happen e.g. if not edited sequence was copied from pherogram yet.
165 return 0;
166 }
167 }
168 else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
169 return getCDMSequence().getSequenceString().length();
170 }
171 else {
172 throw new IllegalArgumentException(String.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND, sequenceID));
173 }
174 }
175
176
177 @Override
178 public LinkedLabeledIDEvent getSequenceStartEvent(ReadWriteParameterMap parameters, String sequenceID) {
179 int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
180 if (singleReadIndex >= 0) {
181 return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, "Single read " + singleReadIndex, null); //$NON-NLS-1$
182 //TODO Use name displayed in derivate hierarchy or specified name as label instead?
183 }
184 else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
185 return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, consensusSequenceLabel, null);
186 }
187 else {
188 throw new IllegalArgumentException(String.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND, sequenceID));
189 }
190 }
191
192
193 private void writeStringPart(JPhyloIOEventReceiver receiver, String string, long startColumn, long endColumn) throws IOException {
194 if (string != null) {
195 receiver.add(new SequenceTokensEvent(StringUtils.charSequenceToStringList(
196 string.substring((int)startColumn, (int)endColumn))));
197 }
198 }
199
200
201 private String createMetadataID(String sequenceID, QName predicate) {
202 return sequenceID + "META" + predicate.getLocalPart(); //$NON-NLS-1$
203 }
204
205
206 private void writeMetadataEvents(JPhyloIOEventReceiver receiver, String sequenceID, QName predicate, QName dataType, Object objectValue)
207 throws IOException {
208
209 if (objectValue != null) {
210 JPhyloIOWritingUtils.writeSimpleLiteralMetadata(receiver, createMetadataID(sequenceID, predicate), null,
211 predicate, dataType, objectValue, null);
212 }
213 }
214
215
216 @Override
217 public void writeSequencePartContentData(ReadWriteParameterMap parameters, JPhyloIOEventReceiver receiver, String sequenceID,
218 long startColumn, long endColumn) throws IOException, IllegalArgumentException {
219
220 int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
221 if (singleReadIndex >= 0) {
222 SingleReadAlignment singleRead = singleReadList.get(singleReadIndex);
223 if (startColumn == 0) {
224 writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_SINGLE_READ, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
225 writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_REVERSE_COMPLEMENTED, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(singleRead.isReverseComplement()));
226 writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_LEFT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getLeftCutPosition());
227 writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_RIGHT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getRightCutPosition());
228
229 URI pherogramURI = AlignmentEditorE4.getPherogramURI(singleRead.getSingleRead());
230 if (pherogramURI != null) {
231 JPhyloIOWritingUtils.writeTerminalResourceMetadata(receiver, createMetadataID(sequenceID, PREDICATE_HAS_PHEROGRAM),
232 null, PREDICATE_HAS_PHEROGRAM, pherogramURI.getJavaUri());
233 }
234
235 writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_PHEROGRAM_ALIGNMENT, DATA_TYPE_PHERORAGM_ALIGNMENT, singleRead.getShifts());
236 //TODO This needs to be written with sequence type XML!
237 }
238 writeStringPart(receiver, singleRead.getEditedSequence(), startColumn, endColumn);
239 }
240 else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
241 if (startColumn == 0) {
242 writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_CONSENSUS_SEQUENCE, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
243 //TODO Possibly export additional properties of sequence (e.g. isBarcode(), getDdbjId(), ...) as metadata?
244 }
245 writeStringPart(receiver, getCDMSequence().getSequenceString(), startColumn, endColumn);
246 }
247 else {
248 throw new IllegalArgumentException(String.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND, sequenceID));
249 }
250 }
251
252
253 /* (non-Javadoc)
254 * @see info.bioinfweb.jphyloio.dataadapters.implementations.NoSetsMatrixDataAdapter#getTokenSets()
255 */
256 @Override
257 public ObjectListDataAdapter<TokenSetDefinitionEvent> getTokenSets(ReadWriteParameterMap parameters) {
258 return tokenSetList;
259 }
260 }