Project

General

Profile

Download (22.9 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2015 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.specimen.abcd206.in.molecular;
10

    
11
import eu.etaxonomy.cdm.common.URI;
12
import java.util.List;
13
import java.util.UUID;
14

    
15
import org.apache.logging.log4j.LogManager;import org.apache.logging.log4j.Logger;
16
import org.w3c.dom.Element;
17
import org.w3c.dom.Node;
18
import org.w3c.dom.NodeList;
19

    
20
import eu.etaxonomy.cdm.api.application.ICdmRepository;
21
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportState;
22
import eu.etaxonomy.cdm.io.specimen.abcd206.in.AbcdParseUtility;
23
import eu.etaxonomy.cdm.io.specimen.abcd206.in.SpecimenImportReport;
24
import eu.etaxonomy.cdm.model.common.Annotation;
25
import eu.etaxonomy.cdm.model.common.Language;
26
import eu.etaxonomy.cdm.model.media.Media;
27
import eu.etaxonomy.cdm.model.molecular.Amplification;
28
import eu.etaxonomy.cdm.model.molecular.AmplificationResult;
29
import eu.etaxonomy.cdm.model.molecular.DnaQuality;
30
import eu.etaxonomy.cdm.model.molecular.DnaSample;
31
import eu.etaxonomy.cdm.model.molecular.Primer;
32
import eu.etaxonomy.cdm.model.molecular.Sequence;
33
import eu.etaxonomy.cdm.model.molecular.SequenceDirection;
34
import eu.etaxonomy.cdm.model.molecular.SequenceString;
35
import eu.etaxonomy.cdm.model.molecular.SingleRead;
36
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
37
import eu.etaxonomy.cdm.model.reference.Reference;
38
import eu.etaxonomy.cdm.model.term.DefinedTerm;
39
import eu.etaxonomy.cdm.model.term.DefinedTermBase;
40
import eu.etaxonomy.cdm.model.term.OrderedTerm;
41
import eu.etaxonomy.cdm.model.term.TermType;
42
import eu.etaxonomy.cdm.model.term.TermVocabulary;
43
import eu.etaxonomy.cdm.persistence.query.MatchMode;
44

    
45
/**
46
 * @author pplitzner
47
 * @since Mar 4, 2015
48
 *
49
 */
50
public class AbcdGgbnParser {
51

    
52
    //DNA Quality term
53
    private static final String HIGH = "high";
54
    private static final String MEDIUM = "medium";
55
    private static final String LOW = "low";
56
    private static final UUID HIGH_QUALITY_TERM = UUID.fromString("ec443c76-5987-4ec5-a66b-da207f70b47f");
57
    private static final UUID MEDIUM_QUALITY_TERM = UUID.fromString("2a174892-1246-4807-9022-71ce8639346b");
58
    private static final UUID LOW_QUALITY_TERM = UUID.fromString("a3bf12ff-b041-425f-bdaa-aa51da65eebc");
59

    
60
    private static final String FORWARD = "forward";
61

    
62
    private static final String REVERSE = "reverse";
63

    
64
    private static final Logger logger = LogManager.getLogger(AbcdGgbnParser.class);
65

    
66
    private final String prefix = "ggbn:";
67

    
68
    private final SpecimenImportReport report;
69

    
70
    private final ICdmRepository cdmAppController;
71

    
72
    public AbcdGgbnParser(SpecimenImportReport report, ICdmRepository cdmAppController) {
73
        this.report = report;
74
        this.cdmAppController = cdmAppController;
75
    }
76

    
77
    public DnaSample parse(NodeList ggbn, DnaSample dnaSample, Abcd206ImportState state) {
78

    
79
        for(int i=0;i<ggbn.getLength();i++){
80
            Node item = ggbn.item(i);
81
            if(item instanceof Element){
82
                Element element = (Element) item;
83
                NodeList methodDeterminationConcentrationAndRatiosList = element.getElementsByTagName(prefix+"methodDeterminationConcentrationAndRatios");
84
                NodeList volumeList = element.getElementsByTagName(prefix+"volume");
85
                NodeList weightList = element.getElementsByTagName(prefix+"weight");
86
                NodeList methodDeterminationWeightList = element.getElementsByTagName(prefix+"methodDeterminationWeight");
87
                NodeList DNADNAHybridizationList = element.getElementsByTagName(prefix+"DNADNAHybridization");
88
                NodeList DNAMeltingPointList = element.getElementsByTagName(prefix+"DNAMeltingPoint");
89
                NodeList estimatedSizeList = element.getElementsByTagName(prefix+"estimated_size");
90
                NodeList poolDnaExtractsList = element.getElementsByTagName(prefix+"pool_dna_extracts");
91
                NodeList gelImageList = element.getElementsByTagName(prefix+"gelImage");
92
                NodeList amplificationsList = element.getElementsByTagName(prefix+"Amplifications");
93

    
94
                dnaSample.setDnaQuality(parseDnaQuality(element, state));
95

    
96
                parseGelImage(gelImageList, state);
97
                parseAmplifications(amplificationsList, dnaSample, state);
98
            }
99
        }
100
        return dnaSample;
101
    }
102

    
103
    private DnaQuality parseDnaQuality(Element element, Abcd206ImportState state) {
104
        DnaQuality dnaQuality = DnaQuality.NewInstance();
105

    
106
        NodeList purificationMethodList = element.getElementsByTagName(prefix+"purificationMethod");
107
        String purificationMethod = AbcdParseUtility.parseFirstTextContent(purificationMethodList);
108
        dnaQuality.setPurificationMethod(purificationMethod);
109

    
110
        NodeList concentrationList = element.getElementsByTagName(prefix+"concentration");
111
        if(concentrationList.getLength()==1){
112
            Node concentration = concentrationList.item(0);
113
            dnaQuality.setConcentration(AbcdParseUtility.parseDouble(concentration, report));
114
            if(concentration instanceof Element){
115
                String unit = ((Element) concentration).getAttribute("Unit");
116
                //TODO
117
//                dnaQuality.setConcentrationUnit(concentrationUnit)
118
            }
119
        }
120

    
121
        NodeList ratioOfAbsorbance260_280List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_280");
122
        dnaQuality.setRatioOfAbsorbance260_280(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_280List, report));
123

    
124
        NodeList ratioOfAbsorbance260_230List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_230");
125
        dnaQuality.setRatioOfAbsorbance260_230(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_230List, report));
126

    
127
        NodeList qualityCheckDateList = element.getElementsByTagName(prefix+"qualityCheckDate");
128
        if(qualityCheckDateList.item(0)!=null){
129
            dnaQuality.setQualityCheckDate(AbcdParseUtility.parseFirstDateTime(qualityCheckDateList));
130
        }
131

    
132
        NodeList qualityList = element.getElementsByTagName(prefix+"quality");
133
        String quality = AbcdParseUtility.parseFirstTextContent(qualityList);
134
        if(LOW.equals(quality)){
135
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(LOW_QUALITY_TERM));
136
        }
137
        else if(MEDIUM.equals(quality)){
138
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(MEDIUM_QUALITY_TERM));
139
        }
140
        else if(HIGH.equals(quality)){
141
            dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(HIGH_QUALITY_TERM));
142
        }
143

    
144
        NodeList qualityRemarksList = element.getElementsByTagName(prefix+"qualityRemarks");
145

    
146

    
147
        return dnaQuality;
148
    }
149

    
150
    private void parseGelImage(NodeList gelImageList, Abcd206ImportState state) {
151
        if(gelImageList.item(0)!=null && gelImageList.item(0) instanceof Element){
152
            Element gelImage = (Element)gelImageList.item(0);
153
            NodeList fileURIList = gelImage.getElementsByTagName("fileURI");
154
            NodeList gelVoltageList = gelImage.getElementsByTagName("gelVoltage");
155
            NodeList gelConcentrationList = gelImage.getElementsByTagName("gelConcentration");
156
            NodeList gelDurationList = gelImage.getElementsByTagName("gelDuration");
157
            NodeList gelLadderList = gelImage.getElementsByTagName("gelLadder");
158
            NodeList gelStainList = gelImage.getElementsByTagName("gelStain");
159
            NodeList gelRemarksList = gelImage.getElementsByTagName("gelRemarks");
160

    
161
        }
162

    
163
    }
164

    
165
    private void parseAmplifications(NodeList amplificationsList, DnaSample dnaSample, Abcd206ImportState state) {
166
        if(amplificationsList.item(0)!=null && amplificationsList.item(0) instanceof Element){
167
            AmplificationResult amplificationResult = AmplificationResult.NewInstance();
168
            Amplification amplification = Amplification.NewInstance();
169
            NodeList amplificationList = ((Element) amplificationsList.item(0)).getElementsByTagName(prefix+"amplification");
170
            for(int i=0;i<amplificationList.getLength();i++){
171
                if(amplificationList.item(i) instanceof Element){
172
                    Element amplificationElement = (Element)amplificationList.item(i);
173
                    NodeList amplificationDateList = amplificationElement.getElementsByTagName(prefix+"amplificationDate");
174
                    NodeList amplificationStaffList = amplificationElement.getElementsByTagName(prefix+"amplificationStaff");
175

    
176
                    //amplification dna marker
177
                    NodeList markerList = amplificationElement.getElementsByTagName(prefix+"marker");
178
                    if(markerList.item(0)!=null){
179
                        String amplificationMarker = markerList.item(0).getTextContent();
180
                        DefinedTerm dnaMarker = null;
181
                        List<DefinedTerm> markersFound = cdmAppController.getTermService().findByTitleWithRestrictions(DefinedTerm.class, amplificationMarker, MatchMode.EXACT, null, null, null, null, null).getRecords();
182
                        if(markersFound.size()==1){
183
                            dnaMarker = markersFound.get(0);
184
                        }
185
                        else{
186
                            dnaMarker = DefinedTerm.NewDnaMarkerInstance(amplificationMarker, amplificationMarker, amplificationMarker);
187
                            List<TermVocabulary<DefinedTermBase>> vocs = cdmAppController.getVocabularyService().findByTermType(TermType.DnaMarker, null);
188
                            TermVocabulary<DefinedTermBase> voc = null;
189

    
190
                            if (vocs == null || vocs.size() == 0 ){
191
                                voc = TermVocabulary.NewInstance(TermType.DnaMarker);
192
                                voc.setLabel("Dna Marker");
193
                                cdmAppController.getVocabularyService().saveOrUpdate(voc);
194
                            }else{
195
                                voc = vocs.get(0);
196
                            }
197
                            voc.addTerm(dnaMarker);
198
                            cdmAppController.getTermService().saveOrUpdate(dnaMarker);
199
                        }
200
                        amplification.setDnaMarker(dnaMarker);
201
                    }
202

    
203
                    NodeList markerSubfragmentList = amplificationElement.getElementsByTagName(prefix+"markerSubfragment");
204
                    NodeList amplificationSuccessList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccess");
205
                    NodeList amplificationSuccessDetailsList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccessDetails");
206
                    NodeList amplificationMethodList = amplificationElement.getElementsByTagName(prefix+"amplificationMethod");
207
                    NodeList purificationMethodList = amplificationElement.getElementsByTagName(prefix+"purificationMethod");
208
                    NodeList libReadsSeqdList = amplificationElement.getElementsByTagName(prefix+"lib_reads_seqd");
209
                    NodeList libScreenList = amplificationElement.getElementsByTagName(prefix+"lib_screen");
210
                    NodeList libVectorList = amplificationElement.getElementsByTagName(prefix+"lib_vector");
211
                    NodeList libConstMethList = amplificationElement.getElementsByTagName(prefix+"lib_const_meth");
212
                    NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
213

    
214
                    //consensus sequence
215
                    NodeList sequencingsList = amplificationElement.getElementsByTagName(prefix+"Sequencings");
216
                    if(sequencingsList.item(0)!=null) {
217
                        if ( sequencingsList.item(0) instanceof Element){
218
                            Element el = (Element)sequencingsList.item(0);
219
                            parseAmplificationSequencings(el, amplification, amplificationResult, dnaSample, state);
220
                        }
221
                    }
222

    
223

    
224
                    parseAmplificationPrimers(amplificationElement.getElementsByTagName(prefix+"AmplificationPrimers"));
225
                }
226
            }
227
            //check if amplification already exists (can only be checked after all fields are initialized because comparison is done on the label cache))
228
            List<Amplification> matchingAmplifications = cdmAppController.getAmplificationService().findByLabelCache(amplification.getLabelCache(), MatchMode.EXACT, null, null, null, null, null).getRecords();
229
            if(matchingAmplifications.size()==1){
230
                amplification = matchingAmplifications.iterator().next();
231
            }
232
            cdmAppController.getAmplificationService().save(amplification);
233
            amplificationResult.setAmplification(amplification);
234
            dnaSample.addAmplificationResult(amplificationResult);
235
        }
236
    }
237

    
238
    private void parseAmplificationPrimers(NodeList elementsByTagName) {
239
        // TODO Auto-generated method stub
240

    
241
    }
242

    
243
    private void parseAmplificationSequencings(Element sequencings, Amplification amplification, AmplificationResult amplificationResult, DnaSample dnaSample, Abcd206ImportState state) {
244
        NodeList sequencingList = sequencings.getElementsByTagName(prefix+"sequencing");
245
        for(int i=0;i<sequencingList.getLength();i++){
246
            Sequence sequence = Sequence.NewInstance("");
247
            dnaSample.addSequence(sequence);
248
            sequence.setDnaMarker(amplification.getDnaMarker());
249

    
250
            if(sequencingList.item(i) instanceof Element){
251
                Element sequencing = (Element)sequencingList.item(i);
252

    
253
                //singleSequencings
254
                NodeList singleSequencingsList = sequencing.getElementsByTagName(prefix+"SingleSequencings");
255
                parseSingleSequencings(singleSequencingsList, amplification, amplificationResult, sequence);
256
                //Consensus sequence
257
                NodeList consensusSequencesList = sequencing.getElementsByTagName(prefix+"consensusSequence");
258
                sequence.setConsensusSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(consensusSequencesList)));
259
                //sequence length
260
                Double consensusSequenceLength = AbcdParseUtility.parseFirstDouble(sequencing.getElementsByTagName(prefix+"consensusSequenceLength"), report);
261
                if(sequence.getConsensusSequence()!=null && consensusSequenceLength!=null){
262
                    //TODO: this can be different from the actual length in ABCD but not in CDM!
263
                    sequence.getConsensusSequence().setLength(consensusSequenceLength.intValue());
264
                }
265
                //contig file URL geneticAccessionNumberURI
266
                NodeList consensusSequenceChromatogramFileURIList = sequencing.getElementsByTagName(prefix+"consensusSequenceChromatogramFileURI");
267
                //NodeList consensusSequenceChromatogramFileURIList = sequencing.getElementsByTagName(prefix+"geneticAccessionNumberURI");
268
                URI uri = AbcdParseUtility.parseFirstUri(consensusSequenceChromatogramFileURIList, report);
269
                if (uri != null && uri.toString().endsWith("fasta")){
270
                    state.putSequenceDataStableIdentifier(uri);
271
                    sequence.addAnnotation(Annotation.NewInstance(uri.toString(), Language.DEFAULT()));
272
                }else{
273
                    Media contigFile = Media.NewInstance(uri, null, null, null);
274
                    sequence.setContigFile(contigFile);
275
                }
276
                //genetic Accession
277
                NodeList geneticAccessionList = sequencing.getElementsByTagName(prefix+"geneticAccession");
278
                parseGeneticAccession(geneticAccessionList, sequence);
279

    
280
                //references
281
                NodeList referencesList = sequencing.getElementsByTagName(prefix+"References");
282
                if(referencesList.item(0)!=null && referencesList.item(0) instanceof Element){
283
                    parseSequencingReferences((Element) referencesList.item(0), sequence);
284
                }
285
            }
286
        }
287
//        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
288
//        NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
289

    
290
    }
291

    
292
    private void parseSequencingReferences(Element references, Sequence sequence) {
293
        NodeList referenceList = references.getElementsByTagName(prefix+"Reference");
294
        for(int i=0;i<referenceList.getLength();i++){
295
            if(referenceList.item(i) instanceof Element){
296
                Element element = (Element)referenceList.item(i);
297
                NodeList referenceCitationList = element.getElementsByTagName(prefix+"ReferenceCitation");
298
                Reference reference = AbcdParseUtility.parseFirstReference(referenceCitationList, cdmAppController);
299
                sequence.addCitation(reference);
300
            }
301
        }
302
    }
303

    
304
    private void parseSingleSequencings(NodeList singleSequencingsList, Amplification amplification, AmplificationResult amplificationResult, Sequence sequence) {
305
        if(singleSequencingsList.item(0)!=null && singleSequencingsList.item(0) instanceof Element){
306
            Element singleSequencings = (Element)singleSequencingsList.item(0);
307
            NodeList singleSequencingList = singleSequencings.getElementsByTagName(prefix+"singleSequencing");
308
            for(int i=0;i<singleSequencingList.getLength();i++){
309
                //single read
310
                SingleRead singleRead = SingleRead.NewInstance();
311
                SingleReadAlignment.NewInstance(sequence, singleRead);
312
                amplificationResult.addSingleRead(singleRead);
313
                if(singleSequencingList.item(i) instanceof Element){
314
                    Element singleSequencing = (Element)singleSequencingList.item(i);
315
                    NodeList sequencingDirectionList = singleSequencing.getElementsByTagName(prefix+"sequencingDirection");
316
                    //read direction
317
                    String singleReadDirection = AbcdParseUtility.parseFirstTextContent(sequencingDirectionList);
318

    
319
                    if(singleReadDirection != null && singleReadDirection.equals(FORWARD)){
320
                        singleRead.setDirection(SequenceDirection.Forward);
321
                    }
322
                    else if(singleReadDirection != null && singleReadDirection.equals(REVERSE)){
323
                        singleRead.setDirection(SequenceDirection.Reverse);
324
                    }
325
                    //read pherogram URI
326
                    NodeList chromatogramFileURIList = singleSequencing.getElementsByTagName(prefix+"chromatogramFileURI");
327
                    singleRead.setPherogram(Media.NewInstance(AbcdParseUtility.parseFirstUri(chromatogramFileURIList, report), null, null, null));
328
                    NodeList sequencingPrimersList = singleSequencing.getElementsByTagName(prefix+"SequencingPrimers");
329
                    parseSequencingPrimers(sequencingPrimersList, singleRead, amplification);
330
                }
331
            }
332
        }
333
    }
334

    
335
    private void parseSequencingPrimers(NodeList sequencingPrimersList, SingleRead singleRead, Amplification amplification) {
336
        if(sequencingPrimersList.item(0)!=null && sequencingPrimersList.item(0) instanceof Element){
337
            Element sequencingPrimers = (Element)sequencingPrimersList.item(0);
338
            NodeList sequencingPrimerList = sequencingPrimers.getElementsByTagName(prefix+"sequencingPrimer");
339
            for(int i=0;i<sequencingPrimerList.getLength();i++){
340
                if(sequencingPrimerList.item(i) instanceof Element){
341
                    Element sequencingPrimer = (Element)sequencingPrimerList.item(i);
342
                    //primer name
343
                    String primerName = AbcdParseUtility.parseFirstTextContent(sequencingPrimer.getElementsByTagName(prefix+"primerName"));
344
                    //check if primer already exists
345
                    List<Primer> matchingPrimers = cdmAppController.getPrimerService().findByLabel(primerName, MatchMode.EXACT, null, null, null, null, null).getRecords();
346
                    Primer primer = null;
347
                    if(matchingPrimers.size()==1){
348
                        primer = matchingPrimers.iterator().next();
349
                        return;
350
                    }
351
                    else{
352
                        primer = Primer.NewInstance(null);
353
                        primer.setLabel(primerName);
354
                    }
355
                    singleRead.setPrimer(primer);
356
                    //primer sequence
357
                    NodeList primerSequenceList = sequencingPrimer.getElementsByTagName(prefix+"primerSequence");
358
                    primer.setSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(primerSequenceList)));
359
                    //primer direction
360
                    String direction = parseFirstAttribute("Direction", primerSequenceList);
361
                    if(direction!=null){
362
                        if(direction.equals(FORWARD)){
363
                            amplification.setForwardPrimer(primer);
364
                        }
365
                        else if(direction.equals(REVERSE)){
366
                            amplification.setReversePrimer(primer);
367
                        }
368
                    }
369
                    //reference citation
370
                    NodeList primerReferenceCitationList = sequencingPrimer.getElementsByTagName(prefix+"primerReferenceCitation");
371
                    String primerReferenceCitation = AbcdParseUtility.parseFirstTextContent(primerReferenceCitationList);
372
                    Reference reference = AbcdParseUtility.parseFirstReference(primerReferenceCitationList, cdmAppController);
373
                    primer.setPublishedIn(reference);
374

    
375
                    cdmAppController.getPrimerService().save(primer);
376
                }
377
            }
378
        }
379
    }
380

    
381
    private String parseFirstAttribute(String attributeName, NodeList nodeList) {
382
        String attribute = null;
383
        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
384
            Element element = (Element)nodeList.item(0);
385
            attribute = element.getAttribute(attributeName);
386
        }
387
        return attribute;
388
    }
389

    
390
    private void parseGeneticAccession(NodeList geneticAccessionList, Sequence sequence) {
391
        for(int i=0;i<geneticAccessionList.getLength();i++){
392
            if(geneticAccessionList.item(i) instanceof Element){
393
                //genetic accession number
394
                NodeList geneticAccessionNumberList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumber");
395
                sequence.setGeneticAccessionNumber(AbcdParseUtility.parseFirstTextContent(geneticAccessionNumberList));
396

    
397
                //genetic accession number uri
398
                NodeList geneticAccessionNumberUriList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumberURI");
399
                //TODO: this is different from the geneticAccessionNumber
400

    
401
            }
402
        }
403
    }
404

    
405
}
(2-2/2)