Project

General

Profile

Download (21.2 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2015 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.io.specimen.abcd206.in.ggbn;
11

    
12
import java.net.URI;
13
import java.util.List;
14

    
15
import org.apache.log4j.Logger;
16
import org.joda.time.DateTime;
17
import org.w3c.dom.Element;
18
import org.w3c.dom.Node;
19
import org.w3c.dom.NodeList;
20

    
21
import eu.etaxonomy.cdm.api.application.ICdmApplicationConfiguration;
22
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportReport;
23
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportState;
24
import eu.etaxonomy.cdm.model.common.DefinedTerm;
25
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
26
import eu.etaxonomy.cdm.model.media.Media;
27
import eu.etaxonomy.cdm.model.molecular.Amplification;
28
import eu.etaxonomy.cdm.model.molecular.AmplificationResult;
29
import eu.etaxonomy.cdm.model.molecular.DnaQuality;
30
import eu.etaxonomy.cdm.model.molecular.DnaSample;
31
import eu.etaxonomy.cdm.model.molecular.Primer;
32
import eu.etaxonomy.cdm.model.molecular.Sequence;
33
import eu.etaxonomy.cdm.model.molecular.SequenceDirection;
34
import eu.etaxonomy.cdm.model.molecular.SequenceString;
35
import eu.etaxonomy.cdm.model.molecular.SingleRead;
36
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
37
import eu.etaxonomy.cdm.model.reference.Reference;
38
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
39
import eu.etaxonomy.cdm.persistence.query.MatchMode;
40

    
41
/**
42
 * @author pplitzner
43
 * @date Mar 4, 2015
44
 *
45
 */
46
public class AbcdGgbnParser {
47

    
48
    private static final String FORWARD = "forward";
49

    
50
    private static final String REVERSE = "reverse";
51

    
52
    private static final Logger logger = Logger.getLogger(AbcdGgbnParser.class);
53

    
54
    private final String prefix = "ggbn:";
55

    
56
    private final Abcd206ImportReport report;
57

    
58
    private final ICdmApplicationConfiguration cdmAppController;
59

    
60
    public AbcdGgbnParser(Abcd206ImportReport report, ICdmApplicationConfiguration cdmAppController) {
61
        this.report = report;
62
        this.cdmAppController = cdmAppController;
63
    }
64

    
65
    public DnaSample parse(NodeList ggbn, Abcd206ImportState state) {
66
        DnaSample dnaSample = DnaSample.NewInstance();
67

    
68
        for(int i=0;i<ggbn.getLength();i++){
69
            Node item = ggbn.item(i);
70
            if(item instanceof Element){
71
                Element element = (Element) item;
72
                NodeList methodDeterminationConcentrationAndRatiosList = element.getElementsByTagName(prefix+"methodDeterminationConcentrationAndRatios");
73
                NodeList volumeList = element.getElementsByTagName(prefix+"volume");
74
                NodeList weightList = element.getElementsByTagName(prefix+"weight");
75
                NodeList methodDeterminationWeightList = element.getElementsByTagName(prefix+"methodDeterminationWeight");
76
                NodeList DNADNAHybridizationList = element.getElementsByTagName(prefix+"DNADNAHybridization");
77
                NodeList DNAMeltingPointList = element.getElementsByTagName(prefix+"DNAMeltingPoint");
78
                NodeList estimatedSizeList = element.getElementsByTagName(prefix+"estimated_size");
79
                NodeList poolDnaExtractsList = element.getElementsByTagName(prefix+"pool_dna_extracts");
80
                NodeList gelImageList = element.getElementsByTagName(prefix+"gelImage");
81
                NodeList amplificationsList = element.getElementsByTagName(prefix+"Amplifications");
82

    
83
//                dnaSample.setDnaQuality(parseDnaQuality(element, state));
84

    
85
                parseGelImage(gelImageList, state);
86
                parseAmplifications(amplificationsList, dnaSample, state);
87
            }
88
        }
89
        return dnaSample;
90
    }
91

    
92
    private DnaQuality parseDnaQuality(Element element, Abcd206ImportState state) {
93
        DnaQuality dnaQuality = DnaQuality.NewInstance();
94

    
95
        NodeList purificationMethodList = element.getElementsByTagName(prefix+"purificationMethod");
96
//        dnaQuality.setPurificationMethod(purificationMethod)
97

    
98
        NodeList concentrationList = element.getElementsByTagName(prefix+"concentration");
99
        if(concentrationList.getLength()==1){
100
            Node concentration = concentrationList.item(0);
101
            dnaQuality.setConcentration(parseDouble(concentration));
102
            if(concentration instanceof Element){
103
                String unit = ((Element) concentration).getAttribute("Unit");
104
//                dnaQuality.setConcentrationUnit(concentrationUnit)
105
            }
106
        }
107

    
108
        NodeList ratioOfAbsorbance260_280List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_280");
109
        dnaQuality.setRatioOfAbsorbance260_280(parseFirstNodeDouble(ratioOfAbsorbance260_280List));
110

    
111
        NodeList ratioOfAbsorbance260_230List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_230");
112
        dnaQuality.setRatioOfAbsorbance260_230(parseFirstNodeDouble(ratioOfAbsorbance260_230List));
113

    
114
        NodeList qualityCheckDateList = element.getElementsByTagName(prefix+"qualityCheckDate");
115
        if(qualityCheckDateList.item(0)!=null){
116
            dnaQuality.setQualityCheckDate(DateTime.parse(qualityCheckDateList.item(0).getTextContent()));
117
        }
118

    
119
        NodeList qualityList = element.getElementsByTagName(prefix+"quality");
120
        NodeList qualityRemarksList = element.getElementsByTagName(prefix+"qualityRemarks");
121

    
122
//        dnaQuality.setQualityTerm(qualityTerm)
123

    
124
        return dnaQuality;
125
    }
126

    
127
    private void parseGelImage(NodeList gelImageList, Abcd206ImportState state) {
128
        if(gelImageList.item(0)!=null && gelImageList.item(0) instanceof Element){
129
            Element gelImage = (Element)gelImageList.item(0);
130
            NodeList fileURIList = gelImage.getElementsByTagName("fileURI");
131
            NodeList gelVoltageList = gelImage.getElementsByTagName("gelVoltage");
132
            NodeList gelConcentrationList = gelImage.getElementsByTagName("gelConcentration");
133
            NodeList gelDurationList = gelImage.getElementsByTagName("gelDuration");
134
            NodeList gelLadderList = gelImage.getElementsByTagName("gelLadder");
135
            NodeList gelStainList = gelImage.getElementsByTagName("gelStain");
136
            NodeList gelRemarksList = gelImage.getElementsByTagName("gelRemarks");
137

    
138
        }
139

    
140
    }
141

    
142
    private void parseAmplifications(NodeList amplificationsList, DnaSample dnaSample, Abcd206ImportState state) {
143
        if(amplificationsList.item(0)!=null && amplificationsList.item(0) instanceof Element){
144
            AmplificationResult amplificationResult = AmplificationResult.NewInstance();
145
            Amplification amplification = Amplification.NewInstance();
146
            NodeList amplificationList = ((Element) amplificationsList.item(0)).getElementsByTagName(prefix+"amplification");
147
            for(int i=0;i<amplificationList.getLength();i++){
148
                if(amplificationList.item(i) instanceof Element){
149
                    Element amplificationElement = (Element)amplificationList.item(i);
150
                    NodeList amplificationDateList = amplificationElement.getElementsByTagName(prefix+"amplificationDate");
151
                    NodeList amplificationStaffList = amplificationElement.getElementsByTagName(prefix+"amplificationStaff");
152

    
153
                    NodeList markerList = amplificationElement.getElementsByTagName(prefix+"marker");
154
                    if(markerList.item(0)!=null){
155
                        String amplificationMarker = markerList.item(0).getTextContent();
156
                        DefinedTerm dnaMarker = null;
157
                        List<DefinedTermBase> markersFound = cdmAppController.getTermService().findByTitle(DefinedTerm.class, amplificationMarker, MatchMode.EXACT, null, null, null, null, null).getRecords();
158
                        if(markersFound.size()==1){
159
                            dnaMarker = (DefinedTerm) markersFound.get(0);
160
                        }
161
                        else{
162
                            dnaMarker = DefinedTerm.NewDnaMarkerInstance(amplificationMarker, amplificationMarker, amplificationMarker);
163
                            cdmAppController.getTermService().saveOrUpdate(dnaMarker);
164
                        }
165
                        amplification.setDnaMarker(dnaMarker);
166
                    }
167

    
168
                    NodeList markerSubfragmentList = amplificationElement.getElementsByTagName(prefix+"markerSubfragment");
169
                    NodeList amplificationSuccessList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccess");
170
                    NodeList amplificationSuccessDetailsList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccessDetails");
171
                    NodeList amplificationMethodList = amplificationElement.getElementsByTagName(prefix+"amplificationMethod");
172
                    NodeList purificationMethodList = amplificationElement.getElementsByTagName(prefix+"purificationMethod");
173
                    NodeList libReadsSeqdList = amplificationElement.getElementsByTagName(prefix+"lib_reads_seqd");
174
                    NodeList libScreenList = amplificationElement.getElementsByTagName(prefix+"lib_screen");
175
                    NodeList libVectorList = amplificationElement.getElementsByTagName(prefix+"lib_vector");
176
                    NodeList libConstMethList = amplificationElement.getElementsByTagName(prefix+"lib_const_meth");
177
                    NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
178

    
179
                    NodeList sequencingsList = amplificationElement.getElementsByTagName(prefix+"Sequencings");
180
                    if(sequencingsList.item(0)!=null && sequencingsList.item(0) instanceof Element){
181
                        parseAmplificationSequencings((Element)sequencingsList.item(0), amplification, dnaSample, state);
182
                    }
183
                    parseAmplificationPrimers(amplificationElement.getElementsByTagName(prefix+"AmplificationPrimers"));
184
                }
185
            }
186
            amplificationResult.setAmplification(amplification);
187
            dnaSample.addAmplificationResult(amplificationResult);
188
        }
189
    }
190

    
191
    private void parseAmplificationPrimers(NodeList elementsByTagName) {
192
        // TODO Auto-generated method stub
193

    
194
    }
195

    
196
    private void parseAmplificationSequencings(Element sequencings, Amplification amplification, DnaSample dnaSample, Abcd206ImportState state) {
197
        NodeList sequencingList = sequencings.getElementsByTagName(prefix+"sequencing");
198
        for(int i=0;i<sequencingList.getLength();i++){
199
            Sequence sequence = Sequence.NewInstance("");
200
            dnaSample.addSequence(sequence);
201

    
202
            if(sequencingList.item(i) instanceof Element){
203
                Element sequencing = (Element)sequencingList.item(i);
204

    
205
                //singleSequencings
206
                NodeList singleSequencingsList = sequencing.getElementsByTagName(prefix+"SingleSequencings");
207
                parseSingleSequencings(singleSequencingsList, amplification, sequence);
208
                //Consensus sequence
209
                NodeList consensusSequencesList = sequencing.getElementsByTagName(prefix+"consensusSequence");
210
                sequence.setConsensusSequence(SequenceString.NewInstance(parseFirstTextContent(consensusSequencesList)));
211
                //sequence length
212
                NodeList consensusSequencesLengthList = sequencing.getElementsByTagName(prefix+"consensusSequenceLength");
213
                if(sequence.getConsensusSequence()!=null){
214
                    //TODO: this can be different from the actual length in ABCD but not in CDM!
215
                    sequence.getConsensusSequence().setLength(parseFirstNodeDouble(consensusSequencesLengthList).intValue());
216
                }
217
                //contig file URL
218
                NodeList consensusSequenceChromatogramFileURIList = sequencing.getElementsByTagName(prefix+"consensusSequenceChromatogramFileURI");
219
                URI uri = parseFirstUri(consensusSequenceChromatogramFileURIList);
220
                Media contigFile = Media.NewInstance(uri, null, null, null);
221
                sequence.setContigFile(contigFile);
222

    
223
                //genetic Accession
224
                NodeList geneticAccessionList = sequencing.getElementsByTagName(prefix+"geneticAccession");
225
                parseGeneticAccession(geneticAccessionList, sequence);
226

    
227
                //references
228
                NodeList referencesList = sequencing.getElementsByTagName(prefix+"References");
229
                if(referencesList.item(0)!=null && referencesList.item(0) instanceof Element){
230
                    parseSequencingReferences((Element) referencesList.item(0), sequence);
231
                }
232
            }
233
        }
234
//        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
235
//        NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
236

    
237
    }
238

    
239
    private void parseSequencingReferences(Element references, Sequence sequence) {
240
        NodeList referenceList = references.getElementsByTagName(prefix+"Reference");
241
        for(int i=0;i<referenceList.getLength();i++){
242
            if(referenceList.item(i) instanceof Element){
243
                Element element = (Element)referenceList.item(i);
244
                NodeList referenceCitationList = element.getElementsByTagName(prefix+"ReferenceCitation");
245
                String referenceCitation = parseFirstTextContent(referenceCitationList);
246
                List<Reference> matchedReferences = cdmAppController.getReferenceService().findByTitle(Reference.class, referenceCitation, MatchMode.EXACT, null, null, null, null, null).getRecords();
247
                Reference<?> reference;
248
                if(matchedReferences.size()==1){
249
                    reference = matchedReferences.iterator().next();
250
                }
251
                else{
252
                    reference = ReferenceFactory.newGeneric();
253
                    reference.setTitle(referenceCitation);
254
                    cdmAppController.getReferenceService().saveOrUpdate(reference);
255
                }
256
                sequence.addCitation(reference);
257
            }
258
        }
259

    
260
    }
261

    
262
    private void parseSingleSequencings(NodeList singleSequencingsList, Amplification amplification, Sequence sequence) {
263
        if(singleSequencingsList.item(0)!=null && singleSequencingsList.item(0) instanceof Element){
264
            Element singleSequencings = (Element)singleSequencingsList.item(0);
265
            NodeList singleSequencingList = singleSequencings.getElementsByTagName(prefix+"singleSequencing");
266
            for(int i=0;i<singleSequencingList.getLength();i++){
267
                //single read
268
                SingleRead singleRead = SingleRead.NewInstance();
269
                SingleReadAlignment.NewInstance(sequence, singleRead);
270
                if(singleSequencingList.item(i) instanceof Element){
271
                    Element singleSequencing = (Element)singleSequencingList.item(i);
272
                    NodeList sequencingDirectionList = singleSequencing.getElementsByTagName(prefix+"sequencingDirection");
273
                    //read direction
274
                    String singleReadDirection = parseFirstTextContent(sequencingDirectionList);
275
                    if(singleReadDirection.equals(FORWARD)){
276
                        singleRead.setDirection(SequenceDirection.Forward);
277
                    }
278
                    else if(singleReadDirection.equals(REVERSE)){
279
                        singleRead.setDirection(SequenceDirection.Reverse);
280
                    }
281
                    //read pherogram URI
282
                    NodeList chromatogramFileURIList = singleSequencing.getElementsByTagName(prefix+"chromatogramFileURI");
283
                    singleRead.setPherogram(Media.NewInstance(parseFirstUri(chromatogramFileURIList), null, null, null));
284
                    NodeList sequencingPrimersList = singleSequencing.getElementsByTagName(prefix+"SequencingPrimers");
285
                    parseSequencingPrimers(sequencingPrimersList, amplification);
286
                }
287
            }
288
        }
289
    }
290

    
291
    /**
292
     * @param sequencingPrimersList
293
     * @param amplification
294
     */
295
    private void parseSequencingPrimers(NodeList sequencingPrimersList, Amplification amplification) {
296
        if(sequencingPrimersList.item(0)!=null && sequencingPrimersList.item(0) instanceof Element){
297
            Primer primer = Primer.NewInstance(null);
298
            Element sequencingPrimers = (Element)sequencingPrimersList.item(0);
299
            NodeList sequencingPrimerList = sequencingPrimers.getElementsByTagName(prefix+"sequencingPrimer");
300
            for(int i=0;i<sequencingPrimerList.getLength();i++){
301
                if(sequencingPrimerList.item(i) instanceof Element){
302
                    Element sequencingPrimer = (Element)sequencingPrimerList.item(i);
303
                    //primer sequence
304
                    NodeList primerSequenceList = sequencingPrimer.getElementsByTagName(prefix+"primerSequence");
305
                    primer.setSequence(SequenceString.NewInstance(parseFirstTextContent(primerSequenceList)));
306
                    //primer direction
307
                    String direction = parseFirstAttribute("Direction", primerSequenceList);
308
                    if(direction!=null){
309
                        if(direction.equals(FORWARD)){
310
                            amplification.setForwardPrimer(primer);
311
                        }
312
                        else if(direction.equals(REVERSE)){
313
                            amplification.setReversePrimer(primer);
314
                        }
315
                    }
316
                    //primer name
317
                    NodeList primerNameList = sequencingPrimer.getElementsByTagName(prefix+"primerName");
318
                    primer.setLabel(parseFirstTextContent(primerNameList));
319
                    //reference citation
320
                    NodeList primerReferenceCitationList = sequencingPrimer.getElementsByTagName(prefix+"primerReferenceCitation");
321
                    String primerReferenceCitation = parseFirstTextContent(primerReferenceCitationList);
322
                    List<Reference> matchingReferences = cdmAppController.getReferenceService().findByTitle(Reference.class, primerReferenceCitation, MatchMode.EXACT, null, null, null, null, null).getRecords();
323
                    Reference<?> primerReference;
324
                    if(matchingReferences.size()==1){
325
                        primerReference = matchingReferences.iterator().next();
326
                    }
327
                    else{
328
                        primerReference = ReferenceFactory.newGeneric();
329
                        primerReference.setTitle(primerReferenceCitation);
330
                        cdmAppController.getReferenceService().saveOrUpdate(primerReference);
331
                    }
332
                    primer.setPublishedIn(primerReference);
333
                }
334
            }
335
        }
336
    }
337

    
338
    private String parseFirstAttribute(String attributeName, NodeList nodeList) {
339
        String attribute = null;
340
        if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
341
            Element element = (Element)nodeList.item(0);
342
            attribute = element.getAttribute(attributeName);
343
        }
344
        return attribute;
345
    }
346

    
347
    private void parseGeneticAccession(NodeList geneticAccessionList, Sequence sequence) {
348
        for(int i=0;i<geneticAccessionList.getLength();i++){
349
            if(geneticAccessionList.item(i) instanceof Element){
350
                //genetic accession number
351
                NodeList geneticAccessionNumberList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumber");
352
                sequence.setGeneticAccessionNumber(parseFirstTextContent(geneticAccessionNumberList));
353

    
354
                //genetic accession number uri
355
                NodeList geneticAccessionNumberUriList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumberURI");
356
                //TODO: this is different from the geneticAccessionNumber
357

    
358
            }
359
        }
360
    }
361

    
362
    private URI parseFirstUri(NodeList nodeList){
363
        URI uri = null;
364
        if(nodeList.item(0)!=null){
365
            String textContent = nodeList.item(0).getTextContent();
366
            if(textContent!=null){
367
                try {
368
                    uri = URI.create(textContent);
369
                } catch (IllegalArgumentException e) {
370
                    //nothing
371
                }
372
            }
373
        }
374
        return uri;
375
    }
376

    
377
    private String parseFirstTextContent(NodeList nodeList){
378
        String string = null;
379
        if(nodeList.getLength()>0){
380
            string = nodeList.item(0).getTextContent().replace("\n", "").replaceAll("( )+", " ").trim();
381
        }
382
        return string;
383
    }
384

    
385
    private Double parseFirstNodeDouble(NodeList nodeList){
386
        if(nodeList.getLength()>0){
387
            return parseDouble(nodeList.item(0));
388
        }
389
        return null;
390
    }
391

    
392
    private Double parseDouble(Node node){
393
        String message = "Could not parse double value for node " + node.getNodeName();
394
        Double doubleValue = null;
395
        try{
396
            String textContent = node.getTextContent();
397
            //remove 1000 dots
398
            textContent = textContent.replace(".","");
399
            //convert commmas
400
            textContent = textContent.replace(",",".");
401
            doubleValue = Double.parseDouble(textContent);
402
        } catch (NullPointerException npe){
403
            logger.error(message, npe);
404
        } catch (NumberFormatException nfe){
405
            logger.error(message, nfe);
406
        }
407
        return doubleValue;
408
    }
409

    
410
}
    (1-1/1)