Project

General

Profile

Download (12.2 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2016 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.taxeditor.molecular.io;
11

    
12

    
13
import info.bioinfweb.commons.bio.CharacterStateSetType;
14
import info.bioinfweb.commons.io.W3CXSConstants;
15
import info.bioinfweb.commons.text.StringUtils;
16
import info.bioinfweb.jphyloio.ReadWriteConstants;
17
import info.bioinfweb.jphyloio.ReadWriteParameterMap;
18
import info.bioinfweb.jphyloio.dataadapters.JPhyloIOEventReceiver;
19
import info.bioinfweb.jphyloio.dataadapters.MatrixDataAdapter;
20
import info.bioinfweb.jphyloio.dataadapters.ObjectListDataAdapter;
21
import info.bioinfweb.jphyloio.dataadapters.implementations.NoCharDefsNoSetsMatrixDataAdapter;
22
import info.bioinfweb.jphyloio.dataadapters.implementations.store.StoreObjectListDataAdapter;
23
import info.bioinfweb.jphyloio.events.CharacterSetIntervalEvent;
24
import info.bioinfweb.jphyloio.events.LinkedLabeledIDEvent;
25
import info.bioinfweb.jphyloio.events.SequenceTokensEvent;
26
import info.bioinfweb.jphyloio.events.TokenSetDefinitionEvent;
27
import info.bioinfweb.jphyloio.events.type.EventContentType;
28
import info.bioinfweb.jphyloio.utils.JPhyloIOWritingUtils;
29

    
30
import java.io.IOException;
31
import java.net.URI;
32
import java.util.ArrayList;
33
import java.util.Collections;
34
import java.util.Iterator;
35
import java.util.List;
36

    
37
import javax.xml.namespace.QName;
38

    
39
import eu.etaxonomy.cdm.model.molecular.Sequence;
40
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
41
import eu.etaxonomy.taxeditor.molecular.editor.AlignmentEditor;
42

    
43

    
44

    
45
/**
46
 * In implementation of {@link MatrixDataAdapter} that delegates to a {@link Sequence} object.
47
 * <p>
48
 * Note that this adapter stores information on the {@link Sequence} in creation. Modifying the sequence or linked
49
 * objects will require to create a new instance of this class. Otherwise unexpected behavior may be the consequence.
50
 *
51
 * @author Ben Stöver
52
 * @date 29.04.2016
53
 */
54
public class CDMSequenceMatrixAdapter extends NoCharDefsNoSetsMatrixDataAdapter implements ReadWriteConstants, SingleReadAlignmentRDFXMLConstants {
55
    public static final String MATRIX_ID = DEFAULT_MATRIX_ID_PREFIX + "ContigAlignment";
56
    public static final String SINGLE_READ_SEQUENCE_ID_PREFIX = DEFAULT_SEQUENCE_ID_PREFIX + "SingleRead";
57
    public static final String CONSENSUS_SEQUENCE_ID= DEFAULT_SEQUENCE_ID_PREFIX + "Consensus";
58

    
59

    
60
    private Sequence sequence;
61
    private boolean exportConsensus;
62
    private List<SingleReadAlignment> singleReadList;
63
    private String consensusSequenceLabel;
64
    private ObjectListDataAdapter<TokenSetDefinitionEvent> tokenSetList;
65
    //TODO Also allow specifying single read labels?
66

    
67

    
68
    /**
69
     * Creates a new instance of this class.
70
     *
71
     * @param sequence the <i>CDM</i> sequence object containing the data to be exported
72
     * @param consensusSequenceLabel the label to be used for the consensus sequence (Maybe {@code null}.)
73
     * @param exportConsensus Specify {@code true} here, if the consensus sequence shall be included in the export or {@code false}
74
     *        otherwise.
75
     * @param exportSingleReads Specify {@code true} here, if the single reads shall be included in the export or {@code false}
76
     *        otherwise.
77
     * @throws IllegalArgumentException if both {@code exportConsensus} and {@code exportSingleReads} are {@code false}
78
     */
79
    public CDMSequenceMatrixAdapter(Sequence sequence, String consensusSequenceLabel, boolean exportConsensus, boolean exportSingleReads) {
80
        super();
81
        if (!exportConsensus && !exportSingleReads) {
82
            throw new IllegalArgumentException("Either exportConsensus or exportSingleReads must be true. "
83
                    + "Otherwise no sequences would be contained in this matrix.");
84
        }
85
        else {
86
            this.sequence = sequence;
87
            this.exportConsensus = exportConsensus;
88
            this.consensusSequenceLabel = consensusSequenceLabel;
89
            tokenSetList = createTokenSetList();
90
            if (exportSingleReads) {
91
                singleReadList = new ArrayList<SingleReadAlignment>(sequence.getSingleReadAlignments());  // Store references of single reads in defined order to allow random access.
92
                        //TODO Omit single reads that do not have an edited sequence yet?
93
            }
94
            else {
95
                singleReadList = Collections.emptyList();
96
            }
97
        }
98
    }
99

    
100

    
101
    private ObjectListDataAdapter<TokenSetDefinitionEvent> createTokenSetList() {
102
        StoreObjectListDataAdapter<TokenSetDefinitionEvent> result = new StoreObjectListDataAdapter<TokenSetDefinitionEvent>();
103
        final String id = ReadWriteConstants.DEFAULT_TOKEN_SET_ID_PREFIX;
104
        result.setObjectStartEvent(new TokenSetDefinitionEvent(CharacterStateSetType.DNA, id, null));
105
        long length = getColumnCount(null);  //TODO Change this expression, if column count should return -1 in the future.
106
        if (length > 0) {  // Empty character set interval events are not allowed.  //TODO Handle -1 separately
107
            result.getObjectContent(id).add(new CharacterSetIntervalEvent(0, length));
108
        }
109
        return result;
110
    }
111

    
112

    
113
    /**
114
     * @return the sequence
115
     */
116
    public Sequence getCDMSequence() {
117
        return sequence;
118
    }
119

    
120

    
121
    private int extractSingleReadIndexFromID(String sequenceID) {
122
        if (sequenceID.startsWith(SINGLE_READ_SEQUENCE_ID_PREFIX)) {
123
            try {
124
                return Integer.parseInt(sequenceID.substring(SINGLE_READ_SEQUENCE_ID_PREFIX.length()));
125
            }
126
            catch (NumberFormatException e) {}  // fall through
127
        }
128
        return -1;
129
    }
130

    
131

    
132
    @Override
133
    public LinkedLabeledIDEvent getStartEvent(ReadWriteParameterMap parameters) {
134
        return new LinkedLabeledIDEvent(EventContentType.ALIGNMENT, MATRIX_ID, "Contig alignment", null);
135
                //TODO Use label according to derivate and markers.
136
    }
137

    
138

    
139
    @Override
140
    public boolean containsLongTokens(ReadWriteParameterMap parameters) {
141
        return false;
142
    }
143

    
144

    
145
    @Override
146
    public long getColumnCount(ReadWriteParameterMap parameters) {
147
        return -1;  // Indicates that sequences may have different lengths. (Otherwise writing files without sequence elongation would not be possible.)
148
    }
149

    
150

    
151
    @Override
152
    public long getSequenceCount(ReadWriteParameterMap parameters) {
153
        int addend = 1;
154
        if (!exportConsensus) {
155
            addend = 0;
156
        }
157
        return singleReadList.size() + addend;
158
    }
159

    
160

    
161
    @Override
162
    public Iterator<String> getSequenceIDIterator(ReadWriteParameterMap parameters) {
163
        return new SequenceIDIterator(singleReadList.size(), exportConsensus);
164
    }
165

    
166

    
167
    @Override
168
    public long getSequenceLength(ReadWriteParameterMap parameters, String sequenceID) throws IllegalArgumentException {
169
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
170
        if (singleReadIndex >= 0) {
171
            String sequence = singleReadList.get(singleReadIndex).getEditedSequence();
172
            if (sequence != null) {
173
                return sequence.length();
174
            }
175
            else {  // This would happen e.g. if not edited sequence was copied from pherogram yet.
176
                return 0;
177
            }
178
        }
179
        else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
180
            return getCDMSequence().getSequenceString().length();
181
        }
182
        else {
183
            throw new IllegalArgumentException("No sequence with the ID \"" + sequenceID + "\" could be found.");
184
        }
185
    }
186

    
187

    
188
    @Override
189
    public LinkedLabeledIDEvent getSequenceStartEvent(ReadWriteParameterMap parameters, String sequenceID) {
190
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
191
        if (singleReadIndex >= 0) {
192
            return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, "Single read " + singleReadIndex, null);
193
                    //TODO Use name displayed in derivate hierarchy or specified name as label instead?
194
        }
195
        else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
196
            return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, consensusSequenceLabel, null);
197
        }
198
        else {
199
            throw new IllegalArgumentException("No sequence with the ID \"" + sequenceID + "\" could be found.");
200
        }
201
    }
202

    
203

    
204
    private void writeStringPart(JPhyloIOEventReceiver receiver, String string, long startColumn, long endColumn) throws IOException {
205
        if (string != null) {
206
            receiver.add(new SequenceTokensEvent(StringUtils.charSequenceToStringList(
207
                    string.substring((int)startColumn, (int)endColumn))));
208
        }
209
    }
210

    
211

    
212
    private String createMetadataID(String sequenceID, QName predicate) {
213
        return sequenceID + "META" + predicate.getLocalPart();
214
    }
215

    
216

    
217
    private void writeMetadataEvents(JPhyloIOEventReceiver receiver, String sequenceID, QName predicate, QName dataType, Object objectValue)
218
            throws IOException {
219

    
220
        if (objectValue != null) {
221
            JPhyloIOWritingUtils.writeSimpleLiteralMetadata(receiver, createMetadataID(sequenceID, predicate), null,
222
                    predicate, dataType, objectValue, null);
223
        }
224
    }
225

    
226

    
227
    @Override
228
    public void writeSequencePartContentData(ReadWriteParameterMap parameters, JPhyloIOEventReceiver receiver, String sequenceID,
229
            long startColumn, long endColumn) throws IOException, IllegalArgumentException {
230

    
231
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
232
        if (singleReadIndex >= 0) {
233
            SingleReadAlignment singleRead = singleReadList.get(singleReadIndex);
234
            if (startColumn == 0) {
235
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_SINGLE_READ, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
236
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_REVERSE_COMPLEMENTED, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(singleRead.isReverseComplement()));
237
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_LEFT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getLeftCutPosition());
238
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_RIGHT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getRightCutPosition());
239

    
240
                URI pherogramURI = AlignmentEditor.getPherogramURI(singleRead.getSingleRead());
241
                if (pherogramURI != null) {
242
                    JPhyloIOWritingUtils.writeTerminalResourceMetadata(receiver, createMetadataID(sequenceID, PREDICATE_HAS_PHEROGRAM),
243
                            null, PREDICATE_HAS_PHEROGRAM, pherogramURI);
244
                }
245

    
246
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_PHEROGRAM_ALIGNMENT, DATA_TYPE_PHERORAGM_ALIGNMENT, singleRead.getShifts());
247
                        //TODO This needs to be written with sequence type XML!
248
            }
249
            writeStringPart(receiver, singleRead.getEditedSequence(), startColumn, endColumn);
250
        }
251
        else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
252
            if (startColumn == 0) {
253
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_CONSENSUS_SEQUENCE, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
254
                //TODO Possibly export additional properties of sequence (e.g. isBarcode(), getDdbjId(), ...) as metadata?
255
            }
256
            writeStringPart(receiver, getCDMSequence().getSequenceString(), startColumn, endColumn);
257
        }
258
        else {
259
            throw new IllegalArgumentException("No sequence with the ID \"" + sequenceID + "\" could be found.");
260
        }
261
    }
262

    
263

    
264
    /* (non-Javadoc)
265
     * @see info.bioinfweb.jphyloio.dataadapters.implementations.NoSetsMatrixDataAdapter#getTokenSets()
266
     */
267
    @Override
268
    public ObjectListDataAdapter<TokenSetDefinitionEvent> getTokenSets(ReadWriteParameterMap parameters) {
269
        return tokenSetList;
270
    }
271
}
(2-2/4)