Project

General

Profile

Download (21.4 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2015 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.specimen.abcd206.in.molecular;
10

    
11
import java.net.URI;
12
import java.util.List;
13
import java.util.UUID;
14

    
15
import org.apache.log4j.Logger;
16
import org.w3c.dom.Element;
17
import org.w3c.dom.Node;
18
import org.w3c.dom.NodeList;
19

    
20
import eu.etaxonomy.cdm.api.application.ICdmApplicationConfiguration;
21
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportState;
22
import eu.etaxonomy.cdm.io.specimen.abcd206.in.AbcdParseUtility;
23
import eu.etaxonomy.cdm.io.specimen.abcd206.in.SpecimenImportReport;
24
import eu.etaxonomy.cdm.model.common.DefinedTerm;
25
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
26
import eu.etaxonomy.cdm.model.common.OrderedTerm;
27
import eu.etaxonomy.cdm.model.media.Media;
28
import eu.etaxonomy.cdm.model.molecular.Amplification;
29
import eu.etaxonomy.cdm.model.molecular.AmplificationResult;
30
import eu.etaxonomy.cdm.model.molecular.DnaQuality;
31
import eu.etaxonomy.cdm.model.molecular.DnaSample;
32
import eu.etaxonomy.cdm.model.molecular.Primer;
33
import eu.etaxonomy.cdm.model.molecular.Sequence;
34
import eu.etaxonomy.cdm.model.molecular.SequenceDirection;
35
import eu.etaxonomy.cdm.model.molecular.SequenceString;
36
import eu.etaxonomy.cdm.model.molecular.SingleRead;
37
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
38
import eu.etaxonomy.cdm.model.reference.Reference;
39
import eu.etaxonomy.cdm.persistence.query.MatchMode;
40

    
41
/**
42
 * @author pplitzner
43
 * @date Mar 4, 2015
44
 *
45
 */
46
public class AbcdGgbnParser {
47

    
48
    //DNA Quality term
49
    private static final String HIGH = "high";
50
    private static final String MEDIUM = "medium";
51
    private static final String LOW = "low";
52
    private static final UUID HIGH_QUALITY_TERM = UUID.fromString("ec443c76-5987-4ec5-a66b-da207f70b47f");
53
    private static final UUID MEDIUM_QUALITY_TERM = UUID.fromString("2a174892-1246-4807-9022-71ce8639346b");
54
    private static final UUID LOW_QUALITY_TERM = UUID.fromString("a3bf12ff-b041-425f-bdaa-aa51da65eebc");
55

    
56
    private static final String FORWARD = "forward";
57

    
58
    private static final String REVERSE = "reverse";
59

    
60
    private static final Logger logger = Logger.getLogger(AbcdGgbnParser.class);
61

    
62
    private final String prefix = "ggbn:";
63

    
64
    private final SpecimenImportReport report;
65

    
66
    private final ICdmApplicationConfiguration cdmAppController;
67

    
68
    public AbcdGgbnParser(SpecimenImportReport report, ICdmApplicationConfiguration cdmAppController) {
69
        this.report = report;
70
        this.cdmAppController = cdmAppController;
71
    }
72

    
73
    public DnaSample parse(NodeList ggbn, DnaSample dnaSample, Abcd206ImportState state) {
74

    
75
        for(int i=0;i<ggbn.getLength();i++){
76
            Node item = ggbn.item(i);
77
            if(item instanceof Element){
78
                Element element = (Element) item;
79
                NodeList methodDeterminationConcentrationAndRatiosList = element.getElementsByTagName(prefix+"methodDeterminationConcentrationAndRatios");
80
                NodeList volumeList = element.getElementsByTagName(prefix+"volume");
81
                NodeList weightList = element.getElementsByTagName(prefix+"weight");
82
                NodeList methodDeterminationWeightList = element.getElementsByTagName(prefix+"methodDeterminationWeight");
83
                NodeList DNADNAHybridizationList = element.getElementsByTagName(prefix+"DNADNAHybridization");
84
                NodeList DNAMeltingPointList = element.getElementsByTagName(prefix+"DNAMeltingPoint");
85
                NodeList estimatedSizeList = element.getElementsByTagName(prefix+"estimated_size");
86
                NodeList poolDnaExtractsList = element.getElementsByTagName(prefix+"pool_dna_extracts");
87
                NodeList gelImageList = element.getElementsByTagName(prefix+"gelImage");
88
                NodeList amplificationsList = element.getElementsByTagName(prefix+"Amplifications");
89

    
90
                dnaSample.setDnaQuality(parseDnaQuality(element, state));
91

    
92
                parseGelImage(gelImageList, state);
93
                parseAmplifications(amplificationsList, dnaSample, state);
94
            }
95
        }
96
        return dnaSample;
97
    }
98

    
99
    private DnaQuality parseDnaQuality(Element element, Abcd206ImportState state) {
100
        DnaQuality dnaQuality = DnaQuality.NewInstance();
101

    
102
        NodeList purificationMethodList = element.getElementsByTagName(prefix+"purificationMethod");
103
        String purificationMethod = AbcdParseUtility.parseFirstTextContent(purificationMethodList);
104
        dnaQuality.setPurificationMethod(purificationMethod);
105

    
106
        NodeList concentrationList = element.getElementsByTagName(prefix+"concentration");
107
        if(concentrationList.getLength()==1){
108
            Node concentration = concentrationList.item(0);
109
            dnaQuality.setConcentration(AbcdParseUtility.parseDouble(concentration, report));
110
            if(concentration instanceof Element){
111
                String unit = ((Element) concentration).getAttribute("Unit");
112
                //TODO
113
//                dnaQuality.setConcentrationUnit(concentrationUnit)
114
            }
115
        }
116

    
117
        NodeList ratioOfAbsorbance260_280List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_280");
118
        dnaQuality.setRatioOfAbsorbance260_280(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_280List, report));
119

    
120
        NodeList ratioOfAbsorbance260_230List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_230");
121
        dnaQuality.setRatioOfAbsorbance260_230(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_230List, report));
122

    
123
        NodeList qualityCheckDateList = element.getElementsByTagName(prefix+"qualityCheckDate");
124
        if(qualityCheckDateList.item(0)!=null){
125
            dnaQuality.setQualityCheckDate(AbcdParseUtility.parseFirstDateTime(qualityCheckDateList));
126
        }
127

    
128
        NodeList qualityList = element.getElementsByTagName(prefix+"quality");
129
        String quality = AbcdParseUtility.parseFirstTextContent(qualityList);
130
        if(LOW.equals(quality)){
131
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(LOW_QUALITY_TERM));
132
        }
133
        else if(MEDIUM.equals(quality)){
134
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(MEDIUM_QUALITY_TERM));
135
        }
136
        else if(HIGH.equals(quality)){
137
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(HIGH_QUALITY_TERM));
138
        }
139

    
140
        NodeList qualityRemarksList = element.getElementsByTagName(prefix+"qualityRemarks");
141

    
142

    
143
        return dnaQuality;
144
    }
145

    
146
    private void parseGelImage(NodeList gelImageList, Abcd206ImportState state) {
147
        if(gelImageList.item(0)!=null && gelImageList.item(0) instanceof Element){
148
            Element gelImage = (Element)gelImageList.item(0);
149
            NodeList fileURIList = gelImage.getElementsByTagName("fileURI");
150
            NodeList gelVoltageList = gelImage.getElementsByTagName("gelVoltage");
151
            NodeList gelConcentrationList = gelImage.getElementsByTagName("gelConcentration");
152
            NodeList gelDurationList = gelImage.getElementsByTagName("gelDuration");
153
            NodeList gelLadderList = gelImage.getElementsByTagName("gelLadder");
154
            NodeList gelStainList = gelImage.getElementsByTagName("gelStain");
155
            NodeList gelRemarksList = gelImage.getElementsByTagName("gelRemarks");
156

    
157
        }
158

    
159
    }
160

    
161
    private void parseAmplifications(NodeList amplificationsList, DnaSample dnaSample, Abcd206ImportState state) {
162
        if(amplificationsList.item(0)!=null && amplificationsList.item(0) instanceof Element){
163
            AmplificationResult amplificationResult = AmplificationResult.NewInstance();
164
            Amplification amplification = Amplification.NewInstance();
165
            NodeList amplificationList = ((Element) amplificationsList.item(0)).getElementsByTagName(prefix+"amplification");
166
            for(int i=0;i<amplificationList.getLength();i++){
167
                if(amplificationList.item(i) instanceof Element){
168
                    Element amplificationElement = (Element)amplificationList.item(i);
169
                    NodeList amplificationDateList = amplificationElement.getElementsByTagName(prefix+"amplificationDate");
170
                    NodeList amplificationStaffList = amplificationElement.getElementsByTagName(prefix+"amplificationStaff");
171

    
172
                    //amplification dna marker
173
                    NodeList markerList = amplificationElement.getElementsByTagName(prefix+"marker");
174
                    if(markerList.item(0)!=null){
175
                        String amplificationMarker = markerList.item(0).getTextContent();
176
                        DefinedTerm dnaMarker = null;
177
                        List<DefinedTermBase> markersFound = cdmAppController.getTermService().findByTitle(DefinedTerm.class, amplificationMarker, MatchMode.EXACT, null, null, null, null, null).getRecords();
178
                        if(markersFound.size()==1){
179
                            dnaMarker = (DefinedTerm) markersFound.get(0);
180
                        }
181
                        else{
182
                            dnaMarker = DefinedTerm.NewDnaMarkerInstance(amplificationMarker, amplificationMarker, amplificationMarker);
183
                            cdmAppController.getTermService().saveOrUpdate(dnaMarker);
184
                        }
185
                        amplification.setDnaMarker(dnaMarker);
186
                    }
187

    
188
                    NodeList markerSubfragmentList = amplificationElement.getElementsByTagName(prefix+"markerSubfragment");
189
                    NodeList amplificationSuccessList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccess");
190
                    NodeList amplificationSuccessDetailsList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccessDetails");
191
                    NodeList amplificationMethodList = amplificationElement.getElementsByTagName(prefix+"amplificationMethod");
192
                    NodeList purificationMethodList = amplificationElement.getElementsByTagName(prefix+"purificationMethod");
193
                    NodeList libReadsSeqdList = amplificationElement.getElementsByTagName(prefix+"lib_reads_seqd");
194
                    NodeList libScreenList = amplificationElement.getElementsByTagName(prefix+"lib_screen");
195
                    NodeList libVectorList = amplificationElement.getElementsByTagName(prefix+"lib_vector");
196
                    NodeList libConstMethList = amplificationElement.getElementsByTagName(prefix+"lib_const_meth");
197
                    NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
198

    
199
                    //consensus sequence
200
                    NodeList sequencingsList = amplificationElement.getElementsByTagName(prefix+"Sequencings");
201
                    if(sequencingsList.item(0)!=null && sequencingsList.item(0) instanceof Element){
202
                        parseAmplificationSequencings((Element)sequencingsList.item(0), amplification, amplificationResult, dnaSample, state);
203
                    }
204
                    parseAmplificationPrimers(amplificationElement.getElementsByTagName(prefix+"AmplificationPrimers"));
205
                }
206
            }
207
            //check if amplification already exists (can only be checked after all fields are initialized because comparison is done on the label cache))
208
            List<Amplification> matchingAmplifications = cdmAppController.getAmplificationService().findByLabelCache(amplification.getLabelCache(), MatchMode.EXACT, null, null, null, null, null).getRecords();
209
            if(matchingAmplifications.size()==1){
210
                amplification = matchingAmplifications.iterator().next();
211
            }
212
            cdmAppController.getAmplificationService().save(amplification);
213
            amplificationResult.setAmplification(amplification);
214
            dnaSample.addAmplificationResult(amplificationResult);
215
        }
216
    }
217

    
218
    private void parseAmplificationPrimers(NodeList elementsByTagName) {
219
        // TODO Auto-generated method stub
220

    
221
    }
222

    
223
    private void parseAmplificationSequencings(Element sequencings, Amplification amplification, AmplificationResult amplificationResult, DnaSample dnaSample, Abcd206ImportState state) {
224
        NodeList sequencingList = sequencings.getElementsByTagName(prefix+"sequencing");
225
        for(int i=0;i<sequencingList.getLength();i++){
226
            Sequence sequence = Sequence.NewInstance("");
227
            dnaSample.addSequence(sequence);
228
            sequence.setDnaMarker(amplification.getDnaMarker());
229

    
230
            if(sequencingList.item(i) instanceof Element){
231
                Element sequencing = (Element)sequencingList.item(i);
232

    
233
                //singleSequencings
234
                NodeList singleSequencingsList = sequencing.getElementsByTagName(prefix+"SingleSequencings");
235
                parseSingleSequencings(singleSequencingsList, amplification, amplificationResult, sequence);
236
                //Consensus sequence
237
                NodeList consensusSequencesList = sequencing.getElementsByTagName(prefix+"consensusSequence");
238
                sequence.setConsensusSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(consensusSequencesList)));
239
                //sequence length
240
                Double consensusSequenceLength = AbcdParseUtility.parseFirstDouble(sequencing.getElementsByTagName(prefix+"consensusSequenceLength"), report);
241
                if(sequence.getConsensusSequence()!=null && consensusSequenceLength!=null){
242
                    //TODO: this can be different from the actual length in ABCD but not in CDM!
243
                    sequence.getConsensusSequence().setLength(consensusSequenceLength.intValue());
244
                }
245
                //contig file URL
246
                NodeList consensusSequenceChromatogramFileURIList = sequencing.getElementsByTagName(prefix+"consensusSequenceChromatogramFileURI");
247
                URI uri = AbcdParseUtility.parseFirstUri(consensusSequenceChromatogramFileURIList, report);
248
                Media contigFile = Media.NewInstance(uri, null, null, null);
249
                sequence.setContigFile(contigFile);
250

    
251
                //genetic Accession
252
                NodeList geneticAccessionList = sequencing.getElementsByTagName(prefix+"geneticAccession");
253
                parseGeneticAccession(geneticAccessionList, sequence);
254

    
255
                //references
256
                NodeList referencesList = sequencing.getElementsByTagName(prefix+"References");
257
                if(referencesList.item(0)!=null && referencesList.item(0) instanceof Element){
258
                    parseSequencingReferences((Element) referencesList.item(0), sequence);
259
                }
260
            }
261
        }
262
//        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
263
//        NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
264

    
265
    }
266

    
267
    private void parseSequencingReferences(Element references, Sequence sequence) {
268
        NodeList referenceList = references.getElementsByTagName(prefix+"Reference");
269
        for(int i=0;i<referenceList.getLength();i++){
270
            if(referenceList.item(i) instanceof Element){
271
                Element element = (Element)referenceList.item(i);
272
                NodeList referenceCitationList = element.getElementsByTagName(prefix+"ReferenceCitation");
273
                Reference reference = AbcdParseUtility.parseFirstReference(referenceCitationList, cdmAppController);
274
                sequence.addCitation(reference);
275
            }
276
        }
277
    }
278

    
279
    private void parseSingleSequencings(NodeList singleSequencingsList, Amplification amplification, AmplificationResult amplificationResult, Sequence sequence) {
280
        if(singleSequencingsList.item(0)!=null && singleSequencingsList.item(0) instanceof Element){
281
            Element singleSequencings = (Element)singleSequencingsList.item(0);
282
            NodeList singleSequencingList = singleSequencings.getElementsByTagName(prefix+"singleSequencing");
283
            for(int i=0;i<singleSequencingList.getLength();i++){
284
                //single read
285
                SingleRead singleRead = SingleRead.NewInstance();
286
                SingleReadAlignment.NewInstance(sequence, singleRead);
287
                amplificationResult.addSingleRead(singleRead);
288
                if(singleSequencingList.item(i) instanceof Element){
289
                    Element singleSequencing = (Element)singleSequencingList.item(i);
290
                    NodeList sequencingDirectionList = singleSequencing.getElementsByTagName(prefix+"sequencingDirection");
291
                    //read direction
292
                    String singleReadDirection = AbcdParseUtility.parseFirstTextContent(sequencingDirectionList);
293
                    if(singleReadDirection.equals(FORWARD)){
294
                        singleRead.setDirection(SequenceDirection.Forward);
295
                    }
296
                    else if(singleReadDirection.equals(REVERSE)){
297
                        singleRead.setDirection(SequenceDirection.Reverse);
298
                    }
299
                    //read pherogram URI
300
                    NodeList chromatogramFileURIList = singleSequencing.getElementsByTagName(prefix+"chromatogramFileURI");
301
                    singleRead.setPherogram(Media.NewInstance(AbcdParseUtility.parseFirstUri(chromatogramFileURIList, report), null, null, null));
302
                    NodeList sequencingPrimersList = singleSequencing.getElementsByTagName(prefix+"SequencingPrimers");
303
                    parseSequencingPrimers(sequencingPrimersList, singleRead, amplification);
304
                }
305
            }
306
        }
307
    }
308

    
309
    private void parseSequencingPrimers(NodeList sequencingPrimersList, SingleRead singleRead, Amplification amplification) {
310
        if(sequencingPrimersList.item(0)!=null && sequencingPrimersList.item(0) instanceof Element){
311
            Element sequencingPrimers = (Element)sequencingPrimersList.item(0);
312
            NodeList sequencingPrimerList = sequencingPrimers.getElementsByTagName(prefix+"sequencingPrimer");
313
            for(int i=0;i<sequencingPrimerList.getLength();i++){
314
                if(sequencingPrimerList.item(i) instanceof Element){
315
                    Element sequencingPrimer = (Element)sequencingPrimerList.item(i);
316
                    //primer name
317
                    String primerName = AbcdParseUtility.parseFirstTextContent(sequencingPrimer.getElementsByTagName(prefix+"primerName"));
318
                    //check if primer already exists
319
                    List<Primer> matchingPrimers = cdmAppController.getPrimerService().findByLabel(primerName, MatchMode.EXACT, null, null, null, null, null).getRecords();
320
                    Primer primer = null;
321
                    if(matchingPrimers.size()==1){
322
                        primer = matchingPrimers.iterator().next();
323
                        return;
324
                    }
325
                    else{
326
                        primer = Primer.NewInstance(null);
327
                        primer.setLabel(primerName);
328
                    }
329
                    singleRead.setPrimer(primer);
330
                    //primer sequence
331
                    NodeList primerSequenceList = sequencingPrimer.getElementsByTagName(prefix+"primerSequence");
332
                    primer.setSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(primerSequenceList)));
333
                    //primer direction
334
                    String direction = parseFirstAttribute("Direction", primerSequenceList);
335
                    if(direction!=null){
336
                        if(direction.equals(FORWARD)){
337
                            amplification.setForwardPrimer(primer);
338
                        }
339
                        else if(direction.equals(REVERSE)){
340
                            amplification.setReversePrimer(primer);
341
                        }
342
                    }
343
                    //reference citation
344
                    NodeList primerReferenceCitationList = sequencingPrimer.getElementsByTagName(prefix+"primerReferenceCitation");
345
                    String primerReferenceCitation = AbcdParseUtility.parseFirstTextContent(primerReferenceCitationList);
346
                    Reference reference = AbcdParseUtility.parseFirstReference(primerReferenceCitationList, cdmAppController);
347
                    primer.setPublishedIn(reference);
348

    
349
                    cdmAppController.getPrimerService().save(primer);
350
                }
351
            }
352
        }
353
    }
354

    
355
    private String parseFirstAttribute(String attributeName, NodeList nodeList) {
356
        String attribute = null;
357
        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
358
            Element element = (Element)nodeList.item(0);
359
            attribute = element.getAttribute(attributeName);
360
        }
361
        return attribute;
362
    }
363

    
364
    private void parseGeneticAccession(NodeList geneticAccessionList, Sequence sequence) {
365
        for(int i=0;i<geneticAccessionList.getLength();i++){
366
            if(geneticAccessionList.item(i) instanceof Element){
367
                //genetic accession number
368
                NodeList geneticAccessionNumberList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumber");
369
                sequence.setGeneticAccessionNumber(AbcdParseUtility.parseFirstTextContent(geneticAccessionNumberList));
370

    
371
                //genetic accession number uri
372
                NodeList geneticAccessionNumberUriList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumberURI");
373
                //TODO: this is different from the geneticAccessionNumber
374

    
375
            }
376
        }
377
    }
378

    
379
}
(2-2/2)