Project

General

Profile

Download (21.5 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2015 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.specimen.abcd206.in.molecular;
10

    
11
import java.net.URI;
12
import java.util.List;
13
import java.util.UUID;
14

    
15
import org.apache.log4j.Logger;
16
import org.w3c.dom.Element;
17
import org.w3c.dom.Node;
18
import org.w3c.dom.NodeList;
19

    
20
import eu.etaxonomy.cdm.api.application.ICdmRepository;
21
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportState;
22
import eu.etaxonomy.cdm.io.specimen.abcd206.in.AbcdParseUtility;
23
import eu.etaxonomy.cdm.io.specimen.abcd206.in.SpecimenImportReport;
24
import eu.etaxonomy.cdm.model.media.Media;
25
import eu.etaxonomy.cdm.model.molecular.Amplification;
26
import eu.etaxonomy.cdm.model.molecular.AmplificationResult;
27
import eu.etaxonomy.cdm.model.molecular.DnaQuality;
28
import eu.etaxonomy.cdm.model.molecular.DnaSample;
29
import eu.etaxonomy.cdm.model.molecular.Primer;
30
import eu.etaxonomy.cdm.model.molecular.Sequence;
31
import eu.etaxonomy.cdm.model.molecular.SequenceDirection;
32
import eu.etaxonomy.cdm.model.molecular.SequenceString;
33
import eu.etaxonomy.cdm.model.molecular.SingleRead;
34
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.term.DefinedTerm;
37
import eu.etaxonomy.cdm.model.term.OrderedTerm;
38
import eu.etaxonomy.cdm.persistence.query.MatchMode;
39

    
40
/**
41
 * @author pplitzner
42
 * @since Mar 4, 2015
43
 *
44
 */
45
public class AbcdGgbnParser {
46

    
47
    //DNA Quality term
48
    private static final String HIGH = "high";
49
    private static final String MEDIUM = "medium";
50
    private static final String LOW = "low";
51
    private static final UUID HIGH_QUALITY_TERM = UUID.fromString("ec443c76-5987-4ec5-a66b-da207f70b47f");
52
    private static final UUID MEDIUM_QUALITY_TERM = UUID.fromString("2a174892-1246-4807-9022-71ce8639346b");
53
    private static final UUID LOW_QUALITY_TERM = UUID.fromString("a3bf12ff-b041-425f-bdaa-aa51da65eebc");
54

    
55
    private static final String FORWARD = "forward";
56

    
57
    private static final String REVERSE = "reverse";
58

    
59
    private static final Logger logger = Logger.getLogger(AbcdGgbnParser.class);
60

    
61
    private final String prefix = "ggbn:";
62

    
63
    private final SpecimenImportReport report;
64

    
65
    private final ICdmRepository cdmAppController;
66

    
67
    public AbcdGgbnParser(SpecimenImportReport report, ICdmRepository cdmAppController) {
68
        this.report = report;
69
        this.cdmAppController = cdmAppController;
70
    }
71

    
72
    public DnaSample parse(NodeList ggbn, DnaSample dnaSample, Abcd206ImportState state) {
73

    
74
        for(int i=0;i<ggbn.getLength();i++){
75
            Node item = ggbn.item(i);
76
            if(item instanceof Element){
77
                Element element = (Element) item;
78
                NodeList methodDeterminationConcentrationAndRatiosList = element.getElementsByTagName(prefix+"methodDeterminationConcentrationAndRatios");
79
                NodeList volumeList = element.getElementsByTagName(prefix+"volume");
80
                NodeList weightList = element.getElementsByTagName(prefix+"weight");
81
                NodeList methodDeterminationWeightList = element.getElementsByTagName(prefix+"methodDeterminationWeight");
82
                NodeList DNADNAHybridizationList = element.getElementsByTagName(prefix+"DNADNAHybridization");
83
                NodeList DNAMeltingPointList = element.getElementsByTagName(prefix+"DNAMeltingPoint");
84
                NodeList estimatedSizeList = element.getElementsByTagName(prefix+"estimated_size");
85
                NodeList poolDnaExtractsList = element.getElementsByTagName(prefix+"pool_dna_extracts");
86
                NodeList gelImageList = element.getElementsByTagName(prefix+"gelImage");
87
                NodeList amplificationsList = element.getElementsByTagName(prefix+"Amplifications");
88

    
89
                dnaSample.setDnaQuality(parseDnaQuality(element, state));
90

    
91
                parseGelImage(gelImageList, state);
92
                parseAmplifications(amplificationsList, dnaSample, state);
93
            }
94
        }
95
        return dnaSample;
96
    }
97

    
98
    private DnaQuality parseDnaQuality(Element element, Abcd206ImportState state) {
99
        DnaQuality dnaQuality = DnaQuality.NewInstance();
100

    
101
        NodeList purificationMethodList = element.getElementsByTagName(prefix+"purificationMethod");
102
        String purificationMethod = AbcdParseUtility.parseFirstTextContent(purificationMethodList);
103
        dnaQuality.setPurificationMethod(purificationMethod);
104

    
105
        NodeList concentrationList = element.getElementsByTagName(prefix+"concentration");
106
        if(concentrationList.getLength()==1){
107
            Node concentration = concentrationList.item(0);
108
            dnaQuality.setConcentration(AbcdParseUtility.parseDouble(concentration, report));
109
            if(concentration instanceof Element){
110
                String unit = ((Element) concentration).getAttribute("Unit");
111
                //TODO
112
//                dnaQuality.setConcentrationUnit(concentrationUnit)
113
            }
114
        }
115

    
116
        NodeList ratioOfAbsorbance260_280List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_280");
117
        dnaQuality.setRatioOfAbsorbance260_280(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_280List, report));
118

    
119
        NodeList ratioOfAbsorbance260_230List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_230");
120
        dnaQuality.setRatioOfAbsorbance260_230(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_230List, report));
121

    
122
        NodeList qualityCheckDateList = element.getElementsByTagName(prefix+"qualityCheckDate");
123
        if(qualityCheckDateList.item(0)!=null){
124
            dnaQuality.setQualityCheckDate(AbcdParseUtility.parseFirstDateTime(qualityCheckDateList));
125
        }
126

    
127
        NodeList qualityList = element.getElementsByTagName(prefix+"quality");
128
        String quality = AbcdParseUtility.parseFirstTextContent(qualityList);
129
        if(LOW.equals(quality)){
130
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(LOW_QUALITY_TERM));
131
        }
132
        else if(MEDIUM.equals(quality)){
133
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(MEDIUM_QUALITY_TERM));
134
        }
135
        else if(HIGH.equals(quality)){
136
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(HIGH_QUALITY_TERM));
137
        }
138

    
139
        NodeList qualityRemarksList = element.getElementsByTagName(prefix+"qualityRemarks");
140

    
141

    
142
        return dnaQuality;
143
    }
144

    
145
    private void parseGelImage(NodeList gelImageList, Abcd206ImportState state) {
146
        if(gelImageList.item(0)!=null && gelImageList.item(0) instanceof Element){
147
            Element gelImage = (Element)gelImageList.item(0);
148
            NodeList fileURIList = gelImage.getElementsByTagName("fileURI");
149
            NodeList gelVoltageList = gelImage.getElementsByTagName("gelVoltage");
150
            NodeList gelConcentrationList = gelImage.getElementsByTagName("gelConcentration");
151
            NodeList gelDurationList = gelImage.getElementsByTagName("gelDuration");
152
            NodeList gelLadderList = gelImage.getElementsByTagName("gelLadder");
153
            NodeList gelStainList = gelImage.getElementsByTagName("gelStain");
154
            NodeList gelRemarksList = gelImage.getElementsByTagName("gelRemarks");
155

    
156
        }
157

    
158
    }
159

    
160
    private void parseAmplifications(NodeList amplificationsList, DnaSample dnaSample, Abcd206ImportState state) {
161
        if(amplificationsList.item(0)!=null && amplificationsList.item(0) instanceof Element){
162
            AmplificationResult amplificationResult = AmplificationResult.NewInstance();
163
            Amplification amplification = Amplification.NewInstance();
164
            NodeList amplificationList = ((Element) amplificationsList.item(0)).getElementsByTagName(prefix+"amplification");
165
            for(int i=0;i<amplificationList.getLength();i++){
166
                if(amplificationList.item(i) instanceof Element){
167
                    Element amplificationElement = (Element)amplificationList.item(i);
168
                    NodeList amplificationDateList = amplificationElement.getElementsByTagName(prefix+"amplificationDate");
169
                    NodeList amplificationStaffList = amplificationElement.getElementsByTagName(prefix+"amplificationStaff");
170

    
171
                    //amplification dna marker
172
                    NodeList markerList = amplificationElement.getElementsByTagName(prefix+"marker");
173
                    if(markerList.item(0)!=null){
174
                        String amplificationMarker = markerList.item(0).getTextContent();
175
                        DefinedTerm dnaMarker = null;
176
                        List<DefinedTerm> markersFound = cdmAppController.getTermService().findByTitleWithRestrictions(DefinedTerm.class, amplificationMarker, MatchMode.EXACT, null, null, null, null, null).getRecords();
177
                        if(markersFound.size()==1){
178
                            dnaMarker = markersFound.get(0);
179
                        }
180
                        else{
181
                            dnaMarker = DefinedTerm.NewDnaMarkerInstance(amplificationMarker, amplificationMarker, amplificationMarker);
182
                            cdmAppController.getTermService().saveOrUpdate(dnaMarker);
183
                        }
184
                        amplification.setDnaMarker(dnaMarker);
185
                    }
186

    
187
                    NodeList markerSubfragmentList = amplificationElement.getElementsByTagName(prefix+"markerSubfragment");
188
                    NodeList amplificationSuccessList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccess");
189
                    NodeList amplificationSuccessDetailsList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccessDetails");
190
                    NodeList amplificationMethodList = amplificationElement.getElementsByTagName(prefix+"amplificationMethod");
191
                    NodeList purificationMethodList = amplificationElement.getElementsByTagName(prefix+"purificationMethod");
192
                    NodeList libReadsSeqdList = amplificationElement.getElementsByTagName(prefix+"lib_reads_seqd");
193
                    NodeList libScreenList = amplificationElement.getElementsByTagName(prefix+"lib_screen");
194
                    NodeList libVectorList = amplificationElement.getElementsByTagName(prefix+"lib_vector");
195
                    NodeList libConstMethList = amplificationElement.getElementsByTagName(prefix+"lib_const_meth");
196
                    NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
197

    
198
                    //consensus sequence
199
                    NodeList sequencingsList = amplificationElement.getElementsByTagName(prefix+"Sequencings");
200
                    if(sequencingsList.item(0)!=null) {
201
                        if ( sequencingsList.item(0) instanceof Element){
202
                            Element el = (Element)sequencingsList.item(0);
203
                            parseAmplificationSequencings(el, amplification, amplificationResult, dnaSample, state);
204
                        }
205
                    }
206

    
207

    
208
                    parseAmplificationPrimers(amplificationElement.getElementsByTagName(prefix+"AmplificationPrimers"));
209
                }
210
            }
211
            //check if amplification already exists (can only be checked after all fields are initialized because comparison is done on the label cache))
212
            List<Amplification> matchingAmplifications = cdmAppController.getAmplificationService().findByLabelCache(amplification.getLabelCache(), MatchMode.EXACT, null, null, null, null, null).getRecords();
213
            if(matchingAmplifications.size()==1){
214
                amplification = matchingAmplifications.iterator().next();
215
            }
216
            cdmAppController.getAmplificationService().save(amplification);
217
            amplificationResult.setAmplification(amplification);
218
            dnaSample.addAmplificationResult(amplificationResult);
219
        }
220
    }
221

    
222
    private void parseAmplificationPrimers(NodeList elementsByTagName) {
223
        // TODO Auto-generated method stub
224

    
225
    }
226

    
227
    private void parseAmplificationSequencings(Element sequencings, Amplification amplification, AmplificationResult amplificationResult, DnaSample dnaSample, Abcd206ImportState state) {
228
        NodeList sequencingList = sequencings.getElementsByTagName(prefix+"sequencing");
229
        for(int i=0;i<sequencingList.getLength();i++){
230
            Sequence sequence = Sequence.NewInstance("");
231
            dnaSample.addSequence(sequence);
232
            sequence.setDnaMarker(amplification.getDnaMarker());
233

    
234
            if(sequencingList.item(i) instanceof Element){
235
                Element sequencing = (Element)sequencingList.item(i);
236

    
237
                //singleSequencings
238
                NodeList singleSequencingsList = sequencing.getElementsByTagName(prefix+"SingleSequencings");
239
                parseSingleSequencings(singleSequencingsList, amplification, amplificationResult, sequence);
240
                //Consensus sequence
241
                NodeList consensusSequencesList = sequencing.getElementsByTagName(prefix+"consensusSequence");
242
                sequence.setConsensusSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(consensusSequencesList)));
243
                //sequence length
244
                Double consensusSequenceLength = AbcdParseUtility.parseFirstDouble(sequencing.getElementsByTagName(prefix+"consensusSequenceLength"), report);
245
                if(sequence.getConsensusSequence()!=null && consensusSequenceLength!=null){
246
                    //TODO: this can be different from the actual length in ABCD but not in CDM!
247
                    sequence.getConsensusSequence().setLength(consensusSequenceLength.intValue());
248
                }
249
                //contig file URL
250
                NodeList consensusSequenceChromatogramFileURIList = sequencing.getElementsByTagName(prefix+"consensusSequenceChromatogramFileURI");
251
                URI uri = AbcdParseUtility.parseFirstUri(consensusSequenceChromatogramFileURIList, report);
252
                if (uri != null && uri.toString().endsWith("fasta")){
253
                    state.putSequenceDataStableIdentifier(uri);
254
                }else{
255
                    Media contigFile = Media.NewInstance(uri, null, null, null);
256
                    sequence.setContigFile(contigFile);
257
                }
258
                //genetic Accession
259
                NodeList geneticAccessionList = sequencing.getElementsByTagName(prefix+"geneticAccession");
260
                parseGeneticAccession(geneticAccessionList, sequence);
261

    
262
                //references
263
                NodeList referencesList = sequencing.getElementsByTagName(prefix+"References");
264
                if(referencesList.item(0)!=null && referencesList.item(0) instanceof Element){
265
                    parseSequencingReferences((Element) referencesList.item(0), sequence);
266
                }
267
            }
268
        }
269
//        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
270
//        NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
271

    
272
    }
273

    
274
    private void parseSequencingReferences(Element references, Sequence sequence) {
275
        NodeList referenceList = references.getElementsByTagName(prefix+"Reference");
276
        for(int i=0;i<referenceList.getLength();i++){
277
            if(referenceList.item(i) instanceof Element){
278
                Element element = (Element)referenceList.item(i);
279
                NodeList referenceCitationList = element.getElementsByTagName(prefix+"ReferenceCitation");
280
                Reference reference = AbcdParseUtility.parseFirstReference(referenceCitationList, cdmAppController);
281
                sequence.addCitation(reference);
282
            }
283
        }
284
    }
285

    
286
    private void parseSingleSequencings(NodeList singleSequencingsList, Amplification amplification, AmplificationResult amplificationResult, Sequence sequence) {
287
        if(singleSequencingsList.item(0)!=null && singleSequencingsList.item(0) instanceof Element){
288
            Element singleSequencings = (Element)singleSequencingsList.item(0);
289
            NodeList singleSequencingList = singleSequencings.getElementsByTagName(prefix+"singleSequencing");
290
            for(int i=0;i<singleSequencingList.getLength();i++){
291
                //single read
292
                SingleRead singleRead = SingleRead.NewInstance();
293
                SingleReadAlignment.NewInstance(sequence, singleRead);
294
                amplificationResult.addSingleRead(singleRead);
295
                if(singleSequencingList.item(i) instanceof Element){
296
                    Element singleSequencing = (Element)singleSequencingList.item(i);
297
                    NodeList sequencingDirectionList = singleSequencing.getElementsByTagName(prefix+"sequencingDirection");
298
                    //read direction
299
                    String singleReadDirection = AbcdParseUtility.parseFirstTextContent(sequencingDirectionList);
300
                    if(singleReadDirection.equals(FORWARD)){
301
                        singleRead.setDirection(SequenceDirection.Forward);
302
                    }
303
                    else if(singleReadDirection.equals(REVERSE)){
304
                        singleRead.setDirection(SequenceDirection.Reverse);
305
                    }
306
                    //read pherogram URI
307
                    NodeList chromatogramFileURIList = singleSequencing.getElementsByTagName(prefix+"chromatogramFileURI");
308
                    singleRead.setPherogram(Media.NewInstance(AbcdParseUtility.parseFirstUri(chromatogramFileURIList, report), null, null, null));
309
                    NodeList sequencingPrimersList = singleSequencing.getElementsByTagName(prefix+"SequencingPrimers");
310
                    parseSequencingPrimers(sequencingPrimersList, singleRead, amplification);
311
                }
312
            }
313
        }
314
    }
315

    
316
    private void parseSequencingPrimers(NodeList sequencingPrimersList, SingleRead singleRead, Amplification amplification) {
317
        if(sequencingPrimersList.item(0)!=null && sequencingPrimersList.item(0) instanceof Element){
318
            Element sequencingPrimers = (Element)sequencingPrimersList.item(0);
319
            NodeList sequencingPrimerList = sequencingPrimers.getElementsByTagName(prefix+"sequencingPrimer");
320
            for(int i=0;i<sequencingPrimerList.getLength();i++){
321
                if(sequencingPrimerList.item(i) instanceof Element){
322
                    Element sequencingPrimer = (Element)sequencingPrimerList.item(i);
323
                    //primer name
324
                    String primerName = AbcdParseUtility.parseFirstTextContent(sequencingPrimer.getElementsByTagName(prefix+"primerName"));
325
                    //check if primer already exists
326
                    List<Primer> matchingPrimers = cdmAppController.getPrimerService().findByLabel(primerName, MatchMode.EXACT, null, null, null, null, null).getRecords();
327
                    Primer primer = null;
328
                    if(matchingPrimers.size()==1){
329
                        primer = matchingPrimers.iterator().next();
330
                        return;
331
                    }
332
                    else{
333
                        primer = Primer.NewInstance(null);
334
                        primer.setLabel(primerName);
335
                    }
336
                    singleRead.setPrimer(primer);
337
                    //primer sequence
338
                    NodeList primerSequenceList = sequencingPrimer.getElementsByTagName(prefix+"primerSequence");
339
                    primer.setSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(primerSequenceList)));
340
                    //primer direction
341
                    String direction = parseFirstAttribute("Direction", primerSequenceList);
342
                    if(direction!=null){
343
                        if(direction.equals(FORWARD)){
344
                            amplification.setForwardPrimer(primer);
345
                        }
346
                        else if(direction.equals(REVERSE)){
347
                            amplification.setReversePrimer(primer);
348
                        }
349
                    }
350
                    //reference citation
351
                    NodeList primerReferenceCitationList = sequencingPrimer.getElementsByTagName(prefix+"primerReferenceCitation");
352
                    String primerReferenceCitation = AbcdParseUtility.parseFirstTextContent(primerReferenceCitationList);
353
                    Reference reference = AbcdParseUtility.parseFirstReference(primerReferenceCitationList, cdmAppController);
354
                    primer.setPublishedIn(reference);
355

    
356
                    cdmAppController.getPrimerService().save(primer);
357
                }
358
            }
359
        }
360
    }
361

    
362
    private String parseFirstAttribute(String attributeName, NodeList nodeList) {
363
        String attribute = null;
364
        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
365
            Element element = (Element)nodeList.item(0);
366
            attribute = element.getAttribute(attributeName);
367
        }
368
        return attribute;
369
    }
370

    
371
    private void parseGeneticAccession(NodeList geneticAccessionList, Sequence sequence) {
372
        for(int i=0;i<geneticAccessionList.getLength();i++){
373
            if(geneticAccessionList.item(i) instanceof Element){
374
                //genetic accession number
375
                NodeList geneticAccessionNumberList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumber");
376
                sequence.setGeneticAccessionNumber(AbcdParseUtility.parseFirstTextContent(geneticAccessionNumberList));
377

    
378
                //genetic accession number uri
379
                NodeList geneticAccessionNumberUriList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumberURI");
380
                //TODO: this is different from the geneticAccessionNumber
381

    
382
            }
383
        }
384
    }
385

    
386
}
(2-2/2)