Project

General

Profile

Download (12.4 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2016 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.taxeditor.molecular.io;
11

    
12

    
13
import java.io.IOException;
14
import java.net.URI;
15
import java.util.ArrayList;
16
import java.util.Collections;
17
import java.util.Iterator;
18
import java.util.List;
19

    
20
import javax.xml.namespace.QName;
21

    
22
import eu.etaxonomy.cdm.model.molecular.Sequence;
23
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
24
import eu.etaxonomy.taxeditor.molecular.editor.e4.AlignmentEditorE4;
25
import eu.etaxonomy.taxeditor.molecular.l10n.Messages;
26
import info.bioinfweb.commons.bio.CharacterStateSetType;
27
import info.bioinfweb.commons.io.W3CXSConstants;
28
import info.bioinfweb.commons.text.StringUtils;
29
import info.bioinfweb.jphyloio.ReadWriteConstants;
30
import info.bioinfweb.jphyloio.ReadWriteParameterMap;
31
import info.bioinfweb.jphyloio.dataadapters.JPhyloIOEventReceiver;
32
import info.bioinfweb.jphyloio.dataadapters.MatrixDataAdapter;
33
import info.bioinfweb.jphyloio.dataadapters.ObjectListDataAdapter;
34
import info.bioinfweb.jphyloio.dataadapters.implementations.NoCharDefsNoSetsMatrixDataAdapter;
35
import info.bioinfweb.jphyloio.dataadapters.implementations.store.StoreObjectListDataAdapter;
36
import info.bioinfweb.jphyloio.events.CharacterSetIntervalEvent;
37
import info.bioinfweb.jphyloio.events.LinkedLabeledIDEvent;
38
import info.bioinfweb.jphyloio.events.SequenceTokensEvent;
39
import info.bioinfweb.jphyloio.events.TokenSetDefinitionEvent;
40
import info.bioinfweb.jphyloio.events.type.EventContentType;
41
import info.bioinfweb.jphyloio.utils.JPhyloIOWritingUtils;
42

    
43

    
44

    
45
/**
46
 * In implementation of {@link MatrixDataAdapter} that delegates to a {@link Sequence} object.
47
 * <p>
48
 * Note that this adapter stores information on the {@link Sequence} in creation. Modifying the sequence or linked
49
 * objects will require to create a new instance of this class. Otherwise unexpected behavior may be the consequence.
50
 *
51
 * @author Ben Stöver
52
 * @date 29.04.2016
53
 */
54
public class CDMSequenceMatrixAdapter extends NoCharDefsNoSetsMatrixDataAdapter implements ReadWriteConstants, SingleReadAlignmentRDFXMLConstants {
55

    
56
    private static final String NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND = Messages.CDMSequenceMatrixAdapter_NO_SEQUENCE_FOUND;
57
    public static final String MATRIX_ID = DEFAULT_MATRIX_ID_PREFIX + "ContigAlignment"; //$NON-NLS-1$
58
    public static final String SINGLE_READ_SEQUENCE_ID_PREFIX = DEFAULT_SEQUENCE_ID_PREFIX + "SingleRead"; //$NON-NLS-1$
59
    public static final String CONSENSUS_SEQUENCE_ID= DEFAULT_SEQUENCE_ID_PREFIX + "Consensus"; //$NON-NLS-1$
60

    
61

    
62
    private Sequence sequence;
63
    private boolean exportConsensus;
64
    private List<SingleReadAlignment> singleReadList;
65
    private String consensusSequenceLabel;
66
    private ObjectListDataAdapter<TokenSetDefinitionEvent> tokenSetList;
67
    //TODO Also allow specifying single read labels?
68

    
69

    
70
    /**
71
     * Creates a new instance of this class.
72
     *
73
     * @param sequence the <i>CDM</i> sequence object containing the data to be exported
74
     * @param consensusSequenceLabel the label to be used for the consensus sequence (Maybe {@code null}.)
75
     * @param exportConsensus Specify {@code true} here, if the consensus sequence shall be included in the export or {@code false}
76
     *        otherwise.
77
     * @param exportSingleReads Specify {@code true} here, if the single reads shall be included in the export or {@code false}
78
     *        otherwise.
79
     * @throws IllegalArgumentException if both {@code exportConsensus} and {@code exportSingleReads} are {@code false}
80
     */
81
    public CDMSequenceMatrixAdapter(Sequence sequence, String consensusSequenceLabel, boolean exportConsensus, boolean exportSingleReads) {
82
        super();
83
        if (!exportConsensus && !exportSingleReads) {
84
            throw new IllegalArgumentException(Messages.CDMSequenceMatrixAdapter_EITHER_MUST_BE_TRUE);
85
        }
86
        else {
87
            this.sequence = sequence;
88
            this.exportConsensus = exportConsensus;
89
            this.consensusSequenceLabel = consensusSequenceLabel;
90
            tokenSetList = createTokenSetList();
91
            if (exportSingleReads) {
92
                singleReadList = new ArrayList<SingleReadAlignment>(sequence.getSingleReadAlignments());  // Store references of single reads in defined order to allow random access.
93
                        //TODO Omit single reads that do not have an edited sequence yet?
94
            }
95
            else {
96
                singleReadList = Collections.emptyList();
97
            }
98
        }
99
    }
100

    
101

    
102
    private ObjectListDataAdapter<TokenSetDefinitionEvent> createTokenSetList() {
103
        StoreObjectListDataAdapter<TokenSetDefinitionEvent> result = new StoreObjectListDataAdapter<TokenSetDefinitionEvent>();
104
        final String id = ReadWriteConstants.DEFAULT_TOKEN_SET_ID_PREFIX;
105
        result.setObjectStartEvent(new TokenSetDefinitionEvent(CharacterStateSetType.DNA, id, null));
106
        long length = getColumnCount(null);  //TODO Change this expression, if column count should return -1 in the future.
107
        if (length > 0) {  // Empty character set interval events are not allowed.  //TODO Handle -1 separately
108
            result.getObjectContent(id).add(new CharacterSetIntervalEvent(0, length));
109
        }
110
        return result;
111
    }
112

    
113

    
114
    /**
115
     * @return the sequence
116
     */
117
    public Sequence getCDMSequence() {
118
        return sequence;
119
    }
120

    
121

    
122
    private int extractSingleReadIndexFromID(String sequenceID) {
123
        if (sequenceID.startsWith(SINGLE_READ_SEQUENCE_ID_PREFIX)) {
124
            try {
125
                return Integer.parseInt(sequenceID.substring(SINGLE_READ_SEQUENCE_ID_PREFIX.length()));
126
            }
127
            catch (NumberFormatException e) {}  // fall through
128
        }
129
        return -1;
130
    }
131

    
132

    
133
    @Override
134
    public LinkedLabeledIDEvent getStartEvent(ReadWriteParameterMap parameters) {
135
        return new LinkedLabeledIDEvent(EventContentType.ALIGNMENT, MATRIX_ID, "Contig alignment", null); //$NON-NLS-1$
136
                //TODO Use label according to derivate and markers.
137
    }
138

    
139

    
140
    @Override
141
    public boolean containsLongTokens(ReadWriteParameterMap parameters) {
142
        return false;
143
    }
144

    
145

    
146
    @Override
147
    public long getColumnCount(ReadWriteParameterMap parameters) {
148
        return -1;  // Indicates that sequences may have different lengths. (Otherwise writing files without sequence elongation would not be possible.)
149
    }
150

    
151

    
152
    @Override
153
    public long getSequenceCount(ReadWriteParameterMap parameters) {
154
        int addend = 1;
155
        if (!exportConsensus) {
156
            addend = 0;
157
        }
158
        return singleReadList.size() + addend;
159
    }
160

    
161

    
162
    @Override
163
    public Iterator<String> getSequenceIDIterator(ReadWriteParameterMap parameters) {
164
        return new SequenceIDIterator(singleReadList.size(), exportConsensus);
165
    }
166

    
167

    
168
    @Override
169
    public long getSequenceLength(ReadWriteParameterMap parameters, String sequenceID) throws IllegalArgumentException {
170
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
171
        if (singleReadIndex >= 0) {
172
            String sequence = singleReadList.get(singleReadIndex).getEditedSequence();
173
            if (sequence != null) {
174
                return sequence.length();
175
            }
176
            else {  // This would happen e.g. if not edited sequence was copied from pherogram yet.
177
                return 0;
178
            }
179
        }
180
        else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
181
            return getCDMSequence().getSequenceString().length();
182
        }
183
        else {
184
            throw new IllegalArgumentException(String.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND, sequenceID));
185
        }
186
    }
187

    
188

    
189
    @Override
190
    public LinkedLabeledIDEvent getSequenceStartEvent(ReadWriteParameterMap parameters, String sequenceID) {
191
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
192
        if (singleReadIndex >= 0) {
193
            return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, "Single read " + singleReadIndex, null); //$NON-NLS-1$
194
                    //TODO Use name displayed in derivate hierarchy or specified name as label instead?
195
        }
196
        else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
197
            return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, consensusSequenceLabel, null);
198
        }
199
        else {
200
            throw new IllegalArgumentException(String.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND, sequenceID));
201
        }
202
    }
203

    
204

    
205
    private void writeStringPart(JPhyloIOEventReceiver receiver, String string, long startColumn, long endColumn) throws IOException {
206
        if (string != null) {
207
            receiver.add(new SequenceTokensEvent(StringUtils.charSequenceToStringList(
208
                    string.substring((int)startColumn, (int)endColumn))));
209
        }
210
    }
211

    
212

    
213
    private String createMetadataID(String sequenceID, QName predicate) {
214
        return sequenceID + "META" + predicate.getLocalPart(); //$NON-NLS-1$
215
    }
216

    
217

    
218
    private void writeMetadataEvents(JPhyloIOEventReceiver receiver, String sequenceID, QName predicate, QName dataType, Object objectValue)
219
            throws IOException {
220

    
221
        if (objectValue != null) {
222
            JPhyloIOWritingUtils.writeSimpleLiteralMetadata(receiver, createMetadataID(sequenceID, predicate), null,
223
                    predicate, dataType, objectValue, null);
224
        }
225
    }
226

    
227

    
228
    @Override
229
    public void writeSequencePartContentData(ReadWriteParameterMap parameters, JPhyloIOEventReceiver receiver, String sequenceID,
230
            long startColumn, long endColumn) throws IOException, IllegalArgumentException {
231

    
232
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
233
        if (singleReadIndex >= 0) {
234
            SingleReadAlignment singleRead = singleReadList.get(singleReadIndex);
235
            if (startColumn == 0) {
236
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_SINGLE_READ, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
237
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_REVERSE_COMPLEMENTED, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(singleRead.isReverseComplement()));
238
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_LEFT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getLeftCutPosition());
239
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_RIGHT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getRightCutPosition());
240

    
241
                URI pherogramURI = AlignmentEditorE4.getPherogramURI(singleRead.getSingleRead());
242
                if (pherogramURI != null) {
243
                    JPhyloIOWritingUtils.writeTerminalResourceMetadata(receiver, createMetadataID(sequenceID, PREDICATE_HAS_PHEROGRAM),
244
                            null, PREDICATE_HAS_PHEROGRAM, pherogramURI);
245
                }
246

    
247
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_PHEROGRAM_ALIGNMENT, DATA_TYPE_PHERORAGM_ALIGNMENT, singleRead.getShifts());
248
                        //TODO This needs to be written with sequence type XML!
249
            }
250
            writeStringPart(receiver, singleRead.getEditedSequence(), startColumn, endColumn);
251
        }
252
        else if (exportConsensus && CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
253
            if (startColumn == 0) {
254
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_CONSENSUS_SEQUENCE, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
255
                //TODO Possibly export additional properties of sequence (e.g. isBarcode(), getDdbjId(), ...) as metadata?
256
            }
257
            writeStringPart(receiver, getCDMSequence().getSequenceString(), startColumn, endColumn);
258
        }
259
        else {
260
            throw new IllegalArgumentException(String.format(NO_SEQUENCE_WITH_THE_ID_D_COULD_BE_FOUND, sequenceID));
261
        }
262
    }
263

    
264

    
265
    /* (non-Javadoc)
266
     * @see info.bioinfweb.jphyloio.dataadapters.implementations.NoSetsMatrixDataAdapter#getTokenSets()
267
     */
268
    @Override
269
    public ObjectListDataAdapter<TokenSetDefinitionEvent> getTokenSets(ReadWriteParameterMap parameters) {
270
        return tokenSetList;
271
    }
272
}
(2-2/4)