Project

General

Profile

Download (11 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2016 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.taxeditor.molecular.io;
11

    
12

    
13
import info.bioinfweb.commons.bio.CharacterStateSetType;
14
import info.bioinfweb.commons.io.W3CXSConstants;
15
import info.bioinfweb.commons.text.StringUtils;
16
import info.bioinfweb.jphyloio.ReadWriteConstants;
17
import info.bioinfweb.jphyloio.ReadWriteParameterMap;
18
import info.bioinfweb.jphyloio.dataadapters.JPhyloIOEventReceiver;
19
import info.bioinfweb.jphyloio.dataadapters.MatrixDataAdapter;
20
import info.bioinfweb.jphyloio.dataadapters.ObjectListDataAdapter;
21
import info.bioinfweb.jphyloio.dataadapters.implementations.NoCharDefsNoSetsMatrixDataAdapter;
22
import info.bioinfweb.jphyloio.dataadapters.implementations.store.StoreObjectListDataAdapter;
23
import info.bioinfweb.jphyloio.events.CharacterSetIntervalEvent;
24
import info.bioinfweb.jphyloio.events.LinkedLabeledIDEvent;
25
import info.bioinfweb.jphyloio.events.SequenceTokensEvent;
26
import info.bioinfweb.jphyloio.events.TokenSetDefinitionEvent;
27
import info.bioinfweb.jphyloio.events.type.EventContentType;
28
import info.bioinfweb.jphyloio.utils.JPhyloIOWritingUtils;
29

    
30
import java.io.IOException;
31
import java.net.URI;
32
import java.util.ArrayList;
33
import java.util.Iterator;
34
import java.util.List;
35

    
36
import javax.xml.namespace.QName;
37

    
38
import eu.etaxonomy.cdm.model.molecular.Sequence;
39
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
40
import eu.etaxonomy.taxeditor.molecular.editor.AlignmentEditor;
41

    
42

    
43

    
44
/**
45
 * In implementation of {@link MatrixDataAdapter} that delegates to a {@link Sequence} object.
46
 * <p>
47
 * Note that this adapter stores information on the {@link Sequence} in creation. Modifying the sequence or linked
48
 * objects will require to create a new instance of this class. Otherwise unexpected behavior may be the consequence.
49
 *
50
 * @author Ben Stöver
51
 * @date 29.04.2016
52
 */
53
public class CDMSequenceMatrixAdapter extends NoCharDefsNoSetsMatrixDataAdapter implements ReadWriteConstants, SingleReadAlignmentRDFXMLConstants {
54
    public static final String MATRIX_ID = DEFAULT_MATRIX_ID_PREFIX + "ContigAlignment";
55
    public static final String SINGLE_READ_SEQUENCE_ID_PREFIX = DEFAULT_SEQUENCE_ID_PREFIX + "SingleRead";
56
    public static final String CONSENSUS_SEQUENCE_ID= DEFAULT_SEQUENCE_ID_PREFIX + "Consensus";
57

    
58

    
59
    private Sequence sequence;
60
    private List<SingleReadAlignment> singleReadList;
61
    private String consensusSequenceLabel;
62
    private ObjectListDataAdapter<TokenSetDefinitionEvent> tokenSetList;
63
    //TODO Also allow specifying single read labels?
64

    
65

    
66
    /**
67
     * Creates a new instance of this class.
68
     *
69
     * @param sequence
70
     * @param consensusSequenceLabel
71
     */
72
    public CDMSequenceMatrixAdapter(Sequence sequence, String consensusSequenceLabel) {
73
        super();
74
        this.sequence = sequence;
75
        this.consensusSequenceLabel = consensusSequenceLabel;
76
        tokenSetList = createTokenSetList();
77
        singleReadList = new ArrayList<SingleReadAlignment>(sequence.getSingleReadAlignments());  // Store references of single reads in defined order to allow random access.
78
                //TODO Omit single reads that do not have an edited sequence yet?
79
    }
80

    
81

    
82
    private ObjectListDataAdapter<TokenSetDefinitionEvent> createTokenSetList() {
83
        StoreObjectListDataAdapter<TokenSetDefinitionEvent> result = new StoreObjectListDataAdapter<TokenSetDefinitionEvent>();
84
        final String id = ReadWriteConstants.DEFAULT_TOKEN_SET_ID_PREFIX;
85
        result.setObjectStartEvent(new TokenSetDefinitionEvent(CharacterStateSetType.DNA, id, null));
86
        long length = getColumnCount(null);  //TODO Change this expression, if column count should return -1 in the future.
87
        if (length > 0) {  // Empty character set interval events are not allowed.  //TODO Handle -1 separately
88
            result.getObjectContent(id).add(new CharacterSetIntervalEvent(0, length));
89
        }
90
        return result;
91
    }
92

    
93

    
94
    /**
95
     * @return the sequence
96
     */
97
    public Sequence getCDMSequence() {
98
        return sequence;
99
    }
100

    
101

    
102
    private int extractSingleReadIndexFromID(String sequenceID) {
103
        if (sequenceID.startsWith(SINGLE_READ_SEQUENCE_ID_PREFIX)) {
104
            try {
105
                return Integer.parseInt(sequenceID.substring(SINGLE_READ_SEQUENCE_ID_PREFIX.length()));
106
            }
107
            catch (NumberFormatException e) {}  // fall through
108
        }
109
        return -1;
110
    }
111

    
112

    
113
    @Override
114
    public LinkedLabeledIDEvent getStartEvent(ReadWriteParameterMap parameters) {
115
        return new LinkedLabeledIDEvent(EventContentType.ALIGNMENT, MATRIX_ID, "Contig alignment", null);
116
                //TODO Use label according to derivate and markers.
117
    }
118

    
119

    
120
    @Override
121
    public boolean containsLongTokens(ReadWriteParameterMap parameters) {
122
        return false;
123
    }
124

    
125

    
126
    @Override
127
    public long getColumnCount(ReadWriteParameterMap parameters) {
128
        return getCDMSequence().getSequenceString().length();  //TODO Consider that parts of single reads may lie outside of the current consensus sequence. Possibly return -1 in the future.
129
    }
130

    
131

    
132
    @Override
133
    public long getSequenceCount(ReadWriteParameterMap parameters) {
134
        return singleReadList.size() + 1;  // The last sequence is the consensus sequence.
135
    }
136

    
137

    
138
    @Override
139
    public Iterator<String> getSequenceIDIterator(ReadWriteParameterMap parameters) {
140
        return new SequenceIDIterator(getCDMSequence().getSingleReadAlignments().size());
141
    }
142

    
143

    
144
    @Override
145
    public long getSequenceLength(ReadWriteParameterMap parameters, String sequenceID) throws IllegalArgumentException {
146
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
147
        if (singleReadIndex >= 0) {
148
            String sequence = singleReadList.get(singleReadIndex).getEditedSequence();
149
            if (sequence != null) {
150
                return sequence.length();
151
            }
152
            else {  // This would happen e.g. if not edited sequence was copied from pherogram yet.
153
                return 0;
154
            }
155
        }
156
        else if (CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
157
            return getCDMSequence().getSequenceString().length();
158
        }
159
        else {
160
            throw new IllegalArgumentException("No sequence with the ID \"" + sequenceID + "\" could be found.");
161
        }
162
    }
163

    
164

    
165
    @Override
166
    public LinkedLabeledIDEvent getSequenceStartEvent(ReadWriteParameterMap parameters, String sequenceID) {
167
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
168
        if (singleReadIndex >= 0) {
169
            return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, "Single read " + singleReadIndex, null);
170
                    //TODO Use name displayed in derivate hierarchy or specified name as label instead?
171
        }
172
        else if (CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
173
            return new LinkedLabeledIDEvent(EventContentType.SEQUENCE, sequenceID, consensusSequenceLabel, null);
174
        }
175
        else {
176
            throw new IllegalArgumentException("No sequence with the ID \"" + sequenceID + "\" could be found.");
177
        }
178
    }
179

    
180

    
181
    private void writeStringPart(JPhyloIOEventReceiver receiver, String string, long startColumn, long endColumn) throws IOException {
182
        if (string != null) {
183
            receiver.add(new SequenceTokensEvent(StringUtils.charSequenceToStringList(
184
                    string.substring((int)startColumn, (int)endColumn))));
185
        }
186
    }
187

    
188

    
189
    private String createMetadataID(String sequenceID, QName predicate) {
190
        return sequenceID + "META" + predicate.getLocalPart();
191
    }
192

    
193

    
194
    private void writeMetadataEvents(JPhyloIOEventReceiver receiver, String sequenceID, QName predicate, QName dataType, Object objectValue)
195
            throws IOException {
196

    
197
        if (objectValue != null) {
198
            JPhyloIOWritingUtils.writeSimpleLiteralMetadata(receiver, createMetadataID(sequenceID, predicate), null,
199
                    predicate, dataType, objectValue, null);
200
        }
201
    }
202

    
203

    
204
    @Override
205
    public void writeSequencePartContentData(ReadWriteParameterMap parameters, JPhyloIOEventReceiver receiver, String sequenceID,
206
            long startColumn, long endColumn) throws IOException, IllegalArgumentException {
207

    
208
        int singleReadIndex = extractSingleReadIndexFromID(sequenceID);
209
        if (singleReadIndex >= 0) {
210
            SingleReadAlignment singleRead = singleReadList.get(singleReadIndex);
211
            if (startColumn == 0) {
212
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_SINGLE_READ, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
213
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_REVERSE_COMPLEMENTED, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(singleRead.isReverseComplement()));
214
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_LEFT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getLeftCutPosition());
215
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_RIGHT_CUT_POSITION, W3CXSConstants.DATA_TYPE_INT, singleRead.getRightCutPosition());
216

    
217
                URI pherogramURI = AlignmentEditor.getPherogramURI(singleRead.getSingleRead());
218
                if (pherogramURI != null) {
219
                    JPhyloIOWritingUtils.writeTerminalResourceMetadata(receiver, createMetadataID(sequenceID, PREDICATE_HAS_PHEROGRAM),
220
                            null, PREDICATE_HAS_PHEROGRAM, pherogramURI);
221
                }
222

    
223
                writeMetadataEvents(receiver, sequenceID, PREDICATE_HAS_PHEROGRAM_ALIGNMENT, DATA_TYPE_PHERORAGM_ALIGNMENT, singleRead.getShifts());
224
                        //TODO This needs to be written with sequence type XML!
225
            }
226
            writeStringPart(receiver, singleRead.getEditedSequence(), startColumn, endColumn);
227
        }
228
        else if (CONSENSUS_SEQUENCE_ID.equals(sequenceID)) {
229
            if (startColumn == 0) {
230
                writeMetadataEvents(receiver, sequenceID, PREDICATE_IS_CONSENSUS_SEQUENCE, W3CXSConstants.DATA_TYPE_BOOLEAN, new Boolean(true));
231
                //TODO Possibly export additional properties of sequence (e.g. isBarcode(), getDdbjId(), ...) as metadata?
232
            }
233
            writeStringPart(receiver, getCDMSequence().getSequenceString(), startColumn, endColumn);
234
        }
235
        else {
236
            throw new IllegalArgumentException("No sequence with the ID \"" + sequenceID + "\" could be found.");
237
        }
238
    }
239

    
240

    
241
    /* (non-Javadoc)
242
     * @see info.bioinfweb.jphyloio.dataadapters.implementations.NoSetsMatrixDataAdapter#getTokenSets()
243
     */
244
    @Override
245
    public ObjectListDataAdapter<TokenSetDefinitionEvent> getTokenSets(ReadWriteParameterMap parameters) {
246
        return tokenSetList;
247
    }
248
}
(2-2/4)