1
|
/**
|
2
|
* Copyright (C) 2015 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.specimen.abcd206.in.molecular;
|
10
|
|
11
|
import java.net.URI;
|
12
|
import java.util.List;
|
13
|
import java.util.UUID;
|
14
|
|
15
|
import org.apache.log4j.Logger;
|
16
|
import org.w3c.dom.Element;
|
17
|
import org.w3c.dom.Node;
|
18
|
import org.w3c.dom.NodeList;
|
19
|
|
20
|
import eu.etaxonomy.cdm.api.application.ICdmRepository;
|
21
|
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportState;
|
22
|
import eu.etaxonomy.cdm.io.specimen.abcd206.in.AbcdParseUtility;
|
23
|
import eu.etaxonomy.cdm.io.specimen.abcd206.in.SpecimenImportReport;
|
24
|
import eu.etaxonomy.cdm.model.media.Media;
|
25
|
import eu.etaxonomy.cdm.model.molecular.Amplification;
|
26
|
import eu.etaxonomy.cdm.model.molecular.AmplificationResult;
|
27
|
import eu.etaxonomy.cdm.model.molecular.DnaQuality;
|
28
|
import eu.etaxonomy.cdm.model.molecular.DnaSample;
|
29
|
import eu.etaxonomy.cdm.model.molecular.Primer;
|
30
|
import eu.etaxonomy.cdm.model.molecular.Sequence;
|
31
|
import eu.etaxonomy.cdm.model.molecular.SequenceDirection;
|
32
|
import eu.etaxonomy.cdm.model.molecular.SequenceString;
|
33
|
import eu.etaxonomy.cdm.model.molecular.SingleRead;
|
34
|
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
|
35
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
36
|
import eu.etaxonomy.cdm.model.term.DefinedTerm;
|
37
|
import eu.etaxonomy.cdm.model.term.OrderedTerm;
|
38
|
import eu.etaxonomy.cdm.persistence.query.MatchMode;
|
39
|
|
40
|
/**
|
41
|
* @author pplitzner
|
42
|
* @since Mar 4, 2015
|
43
|
*
|
44
|
*/
|
45
|
public class AbcdGgbnParser {
|
46
|
|
47
|
//DNA Quality term
|
48
|
private static final String HIGH = "high";
|
49
|
private static final String MEDIUM = "medium";
|
50
|
private static final String LOW = "low";
|
51
|
private static final UUID HIGH_QUALITY_TERM = UUID.fromString("ec443c76-5987-4ec5-a66b-da207f70b47f");
|
52
|
private static final UUID MEDIUM_QUALITY_TERM = UUID.fromString("2a174892-1246-4807-9022-71ce8639346b");
|
53
|
private static final UUID LOW_QUALITY_TERM = UUID.fromString("a3bf12ff-b041-425f-bdaa-aa51da65eebc");
|
54
|
|
55
|
private static final String FORWARD = "forward";
|
56
|
|
57
|
private static final String REVERSE = "reverse";
|
58
|
|
59
|
private static final Logger logger = Logger.getLogger(AbcdGgbnParser.class);
|
60
|
|
61
|
private final String prefix = "ggbn:";
|
62
|
|
63
|
private final SpecimenImportReport report;
|
64
|
|
65
|
private final ICdmRepository cdmAppController;
|
66
|
|
67
|
public AbcdGgbnParser(SpecimenImportReport report, ICdmRepository cdmAppController) {
|
68
|
this.report = report;
|
69
|
this.cdmAppController = cdmAppController;
|
70
|
}
|
71
|
|
72
|
public DnaSample parse(NodeList ggbn, DnaSample dnaSample, Abcd206ImportState state) {
|
73
|
|
74
|
for(int i=0;i<ggbn.getLength();i++){
|
75
|
Node item = ggbn.item(i);
|
76
|
if(item instanceof Element){
|
77
|
Element element = (Element) item;
|
78
|
NodeList methodDeterminationConcentrationAndRatiosList = element.getElementsByTagName(prefix+"methodDeterminationConcentrationAndRatios");
|
79
|
NodeList volumeList = element.getElementsByTagName(prefix+"volume");
|
80
|
NodeList weightList = element.getElementsByTagName(prefix+"weight");
|
81
|
NodeList methodDeterminationWeightList = element.getElementsByTagName(prefix+"methodDeterminationWeight");
|
82
|
NodeList DNADNAHybridizationList = element.getElementsByTagName(prefix+"DNADNAHybridization");
|
83
|
NodeList DNAMeltingPointList = element.getElementsByTagName(prefix+"DNAMeltingPoint");
|
84
|
NodeList estimatedSizeList = element.getElementsByTagName(prefix+"estimated_size");
|
85
|
NodeList poolDnaExtractsList = element.getElementsByTagName(prefix+"pool_dna_extracts");
|
86
|
NodeList gelImageList = element.getElementsByTagName(prefix+"gelImage");
|
87
|
NodeList amplificationsList = element.getElementsByTagName(prefix+"Amplifications");
|
88
|
|
89
|
dnaSample.setDnaQuality(parseDnaQuality(element, state));
|
90
|
|
91
|
parseGelImage(gelImageList, state);
|
92
|
parseAmplifications(amplificationsList, dnaSample, state);
|
93
|
}
|
94
|
}
|
95
|
return dnaSample;
|
96
|
}
|
97
|
|
98
|
private DnaQuality parseDnaQuality(Element element, Abcd206ImportState state) {
|
99
|
DnaQuality dnaQuality = DnaQuality.NewInstance();
|
100
|
|
101
|
NodeList purificationMethodList = element.getElementsByTagName(prefix+"purificationMethod");
|
102
|
String purificationMethod = AbcdParseUtility.parseFirstTextContent(purificationMethodList);
|
103
|
dnaQuality.setPurificationMethod(purificationMethod);
|
104
|
|
105
|
NodeList concentrationList = element.getElementsByTagName(prefix+"concentration");
|
106
|
if(concentrationList.getLength()==1){
|
107
|
Node concentration = concentrationList.item(0);
|
108
|
dnaQuality.setConcentration(AbcdParseUtility.parseDouble(concentration, report));
|
109
|
if(concentration instanceof Element){
|
110
|
String unit = ((Element) concentration).getAttribute("Unit");
|
111
|
//TODO
|
112
|
// dnaQuality.setConcentrationUnit(concentrationUnit)
|
113
|
}
|
114
|
}
|
115
|
|
116
|
NodeList ratioOfAbsorbance260_280List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_280");
|
117
|
dnaQuality.setRatioOfAbsorbance260_280(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_280List, report));
|
118
|
|
119
|
NodeList ratioOfAbsorbance260_230List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_230");
|
120
|
dnaQuality.setRatioOfAbsorbance260_230(AbcdParseUtility.parseFirstDouble(ratioOfAbsorbance260_230List, report));
|
121
|
|
122
|
NodeList qualityCheckDateList = element.getElementsByTagName(prefix+"qualityCheckDate");
|
123
|
if(qualityCheckDateList.item(0)!=null){
|
124
|
dnaQuality.setQualityCheckDate(AbcdParseUtility.parseFirstDateTime(qualityCheckDateList));
|
125
|
}
|
126
|
|
127
|
NodeList qualityList = element.getElementsByTagName(prefix+"quality");
|
128
|
String quality = AbcdParseUtility.parseFirstTextContent(qualityList);
|
129
|
if(LOW.equals(quality)){
|
130
|
dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(LOW_QUALITY_TERM));
|
131
|
}
|
132
|
else if(MEDIUM.equals(quality)){
|
133
|
dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(MEDIUM_QUALITY_TERM));
|
134
|
}
|
135
|
else if(HIGH.equals(quality)){
|
136
|
dnaQuality.setQualityTerm((OrderedTerm) state.getCdmRepository().getTermService().load(HIGH_QUALITY_TERM));
|
137
|
}
|
138
|
|
139
|
NodeList qualityRemarksList = element.getElementsByTagName(prefix+"qualityRemarks");
|
140
|
|
141
|
|
142
|
return dnaQuality;
|
143
|
}
|
144
|
|
145
|
private void parseGelImage(NodeList gelImageList, Abcd206ImportState state) {
|
146
|
if(gelImageList.item(0)!=null && gelImageList.item(0) instanceof Element){
|
147
|
Element gelImage = (Element)gelImageList.item(0);
|
148
|
NodeList fileURIList = gelImage.getElementsByTagName("fileURI");
|
149
|
NodeList gelVoltageList = gelImage.getElementsByTagName("gelVoltage");
|
150
|
NodeList gelConcentrationList = gelImage.getElementsByTagName("gelConcentration");
|
151
|
NodeList gelDurationList = gelImage.getElementsByTagName("gelDuration");
|
152
|
NodeList gelLadderList = gelImage.getElementsByTagName("gelLadder");
|
153
|
NodeList gelStainList = gelImage.getElementsByTagName("gelStain");
|
154
|
NodeList gelRemarksList = gelImage.getElementsByTagName("gelRemarks");
|
155
|
|
156
|
}
|
157
|
|
158
|
}
|
159
|
|
160
|
private void parseAmplifications(NodeList amplificationsList, DnaSample dnaSample, Abcd206ImportState state) {
|
161
|
if(amplificationsList.item(0)!=null && amplificationsList.item(0) instanceof Element){
|
162
|
AmplificationResult amplificationResult = AmplificationResult.NewInstance();
|
163
|
Amplification amplification = Amplification.NewInstance();
|
164
|
NodeList amplificationList = ((Element) amplificationsList.item(0)).getElementsByTagName(prefix+"amplification");
|
165
|
for(int i=0;i<amplificationList.getLength();i++){
|
166
|
if(amplificationList.item(i) instanceof Element){
|
167
|
Element amplificationElement = (Element)amplificationList.item(i);
|
168
|
NodeList amplificationDateList = amplificationElement.getElementsByTagName(prefix+"amplificationDate");
|
169
|
NodeList amplificationStaffList = amplificationElement.getElementsByTagName(prefix+"amplificationStaff");
|
170
|
|
171
|
//amplification dna marker
|
172
|
NodeList markerList = amplificationElement.getElementsByTagName(prefix+"marker");
|
173
|
if(markerList.item(0)!=null){
|
174
|
String amplificationMarker = markerList.item(0).getTextContent();
|
175
|
DefinedTerm dnaMarker = null;
|
176
|
List<DefinedTerm> markersFound = cdmAppController.getTermService().findByTitleWithRestrictions(DefinedTerm.class, amplificationMarker, MatchMode.EXACT, null, null, null, null, null).getRecords();
|
177
|
if(markersFound.size()==1){
|
178
|
dnaMarker = markersFound.get(0);
|
179
|
}
|
180
|
else{
|
181
|
dnaMarker = DefinedTerm.NewDnaMarkerInstance(amplificationMarker, amplificationMarker, amplificationMarker);
|
182
|
cdmAppController.getTermService().saveOrUpdate(dnaMarker);
|
183
|
}
|
184
|
amplification.setDnaMarker(dnaMarker);
|
185
|
}
|
186
|
|
187
|
NodeList markerSubfragmentList = amplificationElement.getElementsByTagName(prefix+"markerSubfragment");
|
188
|
NodeList amplificationSuccessList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccess");
|
189
|
NodeList amplificationSuccessDetailsList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccessDetails");
|
190
|
NodeList amplificationMethodList = amplificationElement.getElementsByTagName(prefix+"amplificationMethod");
|
191
|
NodeList purificationMethodList = amplificationElement.getElementsByTagName(prefix+"purificationMethod");
|
192
|
NodeList libReadsSeqdList = amplificationElement.getElementsByTagName(prefix+"lib_reads_seqd");
|
193
|
NodeList libScreenList = amplificationElement.getElementsByTagName(prefix+"lib_screen");
|
194
|
NodeList libVectorList = amplificationElement.getElementsByTagName(prefix+"lib_vector");
|
195
|
NodeList libConstMethList = amplificationElement.getElementsByTagName(prefix+"lib_const_meth");
|
196
|
NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
|
197
|
|
198
|
//consensus sequence
|
199
|
NodeList sequencingsList = amplificationElement.getElementsByTagName(prefix+"Sequencings");
|
200
|
if(sequencingsList.item(0)!=null) {
|
201
|
if ( sequencingsList.item(0) instanceof Element){
|
202
|
Element el = (Element)sequencingsList.item(0);
|
203
|
parseAmplificationSequencings(el, amplification, amplificationResult, dnaSample, state);
|
204
|
}
|
205
|
}
|
206
|
|
207
|
|
208
|
parseAmplificationPrimers(amplificationElement.getElementsByTagName(prefix+"AmplificationPrimers"));
|
209
|
}
|
210
|
}
|
211
|
//check if amplification already exists (can only be checked after all fields are initialized because comparison is done on the label cache))
|
212
|
List<Amplification> matchingAmplifications = cdmAppController.getAmplificationService().findByLabelCache(amplification.getLabelCache(), MatchMode.EXACT, null, null, null, null, null).getRecords();
|
213
|
if(matchingAmplifications.size()==1){
|
214
|
amplification = matchingAmplifications.iterator().next();
|
215
|
}
|
216
|
cdmAppController.getAmplificationService().save(amplification);
|
217
|
amplificationResult.setAmplification(amplification);
|
218
|
dnaSample.addAmplificationResult(amplificationResult);
|
219
|
}
|
220
|
}
|
221
|
|
222
|
private void parseAmplificationPrimers(NodeList elementsByTagName) {
|
223
|
// TODO Auto-generated method stub
|
224
|
|
225
|
}
|
226
|
|
227
|
private void parseAmplificationSequencings(Element sequencings, Amplification amplification, AmplificationResult amplificationResult, DnaSample dnaSample, Abcd206ImportState state) {
|
228
|
NodeList sequencingList = sequencings.getElementsByTagName(prefix+"sequencing");
|
229
|
for(int i=0;i<sequencingList.getLength();i++){
|
230
|
Sequence sequence = Sequence.NewInstance("");
|
231
|
dnaSample.addSequence(sequence);
|
232
|
sequence.setDnaMarker(amplification.getDnaMarker());
|
233
|
|
234
|
if(sequencingList.item(i) instanceof Element){
|
235
|
Element sequencing = (Element)sequencingList.item(i);
|
236
|
|
237
|
//singleSequencings
|
238
|
NodeList singleSequencingsList = sequencing.getElementsByTagName(prefix+"SingleSequencings");
|
239
|
parseSingleSequencings(singleSequencingsList, amplification, amplificationResult, sequence);
|
240
|
//Consensus sequence
|
241
|
NodeList consensusSequencesList = sequencing.getElementsByTagName(prefix+"consensusSequence");
|
242
|
sequence.setConsensusSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(consensusSequencesList)));
|
243
|
//sequence length
|
244
|
Double consensusSequenceLength = AbcdParseUtility.parseFirstDouble(sequencing.getElementsByTagName(prefix+"consensusSequenceLength"), report);
|
245
|
if(sequence.getConsensusSequence()!=null && consensusSequenceLength!=null){
|
246
|
//TODO: this can be different from the actual length in ABCD but not in CDM!
|
247
|
sequence.getConsensusSequence().setLength(consensusSequenceLength.intValue());
|
248
|
}
|
249
|
//contig file URL
|
250
|
NodeList consensusSequenceChromatogramFileURIList = sequencing.getElementsByTagName(prefix+"consensusSequenceChromatogramFileURI");
|
251
|
URI uri = AbcdParseUtility.parseFirstUri(consensusSequenceChromatogramFileURIList, report);
|
252
|
if (uri != null && uri.toString().endsWith("fasta")){
|
253
|
state.putSequenceDataStableIdentifier(uri);
|
254
|
}else{
|
255
|
Media contigFile = Media.NewInstance(uri, null, null, null);
|
256
|
sequence.setContigFile(contigFile);
|
257
|
}
|
258
|
//genetic Accession
|
259
|
NodeList geneticAccessionList = sequencing.getElementsByTagName(prefix+"geneticAccession");
|
260
|
parseGeneticAccession(geneticAccessionList, sequence);
|
261
|
|
262
|
//references
|
263
|
NodeList referencesList = sequencing.getElementsByTagName(prefix+"References");
|
264
|
if(referencesList.item(0)!=null && referencesList.item(0) instanceof Element){
|
265
|
parseSequencingReferences((Element) referencesList.item(0), sequence);
|
266
|
}
|
267
|
}
|
268
|
}
|
269
|
// if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
|
270
|
// NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
|
271
|
|
272
|
}
|
273
|
|
274
|
private void parseSequencingReferences(Element references, Sequence sequence) {
|
275
|
NodeList referenceList = references.getElementsByTagName(prefix+"Reference");
|
276
|
for(int i=0;i<referenceList.getLength();i++){
|
277
|
if(referenceList.item(i) instanceof Element){
|
278
|
Element element = (Element)referenceList.item(i);
|
279
|
NodeList referenceCitationList = element.getElementsByTagName(prefix+"ReferenceCitation");
|
280
|
Reference reference = AbcdParseUtility.parseFirstReference(referenceCitationList, cdmAppController);
|
281
|
sequence.addCitation(reference);
|
282
|
}
|
283
|
}
|
284
|
}
|
285
|
|
286
|
private void parseSingleSequencings(NodeList singleSequencingsList, Amplification amplification, AmplificationResult amplificationResult, Sequence sequence) {
|
287
|
if(singleSequencingsList.item(0)!=null && singleSequencingsList.item(0) instanceof Element){
|
288
|
Element singleSequencings = (Element)singleSequencingsList.item(0);
|
289
|
NodeList singleSequencingList = singleSequencings.getElementsByTagName(prefix+"singleSequencing");
|
290
|
for(int i=0;i<singleSequencingList.getLength();i++){
|
291
|
//single read
|
292
|
SingleRead singleRead = SingleRead.NewInstance();
|
293
|
SingleReadAlignment.NewInstance(sequence, singleRead);
|
294
|
amplificationResult.addSingleRead(singleRead);
|
295
|
if(singleSequencingList.item(i) instanceof Element){
|
296
|
Element singleSequencing = (Element)singleSequencingList.item(i);
|
297
|
NodeList sequencingDirectionList = singleSequencing.getElementsByTagName(prefix+"sequencingDirection");
|
298
|
//read direction
|
299
|
String singleReadDirection = AbcdParseUtility.parseFirstTextContent(sequencingDirectionList);
|
300
|
if(singleReadDirection.equals(FORWARD)){
|
301
|
singleRead.setDirection(SequenceDirection.Forward);
|
302
|
}
|
303
|
else if(singleReadDirection.equals(REVERSE)){
|
304
|
singleRead.setDirection(SequenceDirection.Reverse);
|
305
|
}
|
306
|
//read pherogram URI
|
307
|
NodeList chromatogramFileURIList = singleSequencing.getElementsByTagName(prefix+"chromatogramFileURI");
|
308
|
singleRead.setPherogram(Media.NewInstance(AbcdParseUtility.parseFirstUri(chromatogramFileURIList, report), null, null, null));
|
309
|
NodeList sequencingPrimersList = singleSequencing.getElementsByTagName(prefix+"SequencingPrimers");
|
310
|
parseSequencingPrimers(sequencingPrimersList, singleRead, amplification);
|
311
|
}
|
312
|
}
|
313
|
}
|
314
|
}
|
315
|
|
316
|
private void parseSequencingPrimers(NodeList sequencingPrimersList, SingleRead singleRead, Amplification amplification) {
|
317
|
if(sequencingPrimersList.item(0)!=null && sequencingPrimersList.item(0) instanceof Element){
|
318
|
Element sequencingPrimers = (Element)sequencingPrimersList.item(0);
|
319
|
NodeList sequencingPrimerList = sequencingPrimers.getElementsByTagName(prefix+"sequencingPrimer");
|
320
|
for(int i=0;i<sequencingPrimerList.getLength();i++){
|
321
|
if(sequencingPrimerList.item(i) instanceof Element){
|
322
|
Element sequencingPrimer = (Element)sequencingPrimerList.item(i);
|
323
|
//primer name
|
324
|
String primerName = AbcdParseUtility.parseFirstTextContent(sequencingPrimer.getElementsByTagName(prefix+"primerName"));
|
325
|
//check if primer already exists
|
326
|
List<Primer> matchingPrimers = cdmAppController.getPrimerService().findByLabel(primerName, MatchMode.EXACT, null, null, null, null, null).getRecords();
|
327
|
Primer primer = null;
|
328
|
if(matchingPrimers.size()==1){
|
329
|
primer = matchingPrimers.iterator().next();
|
330
|
return;
|
331
|
}
|
332
|
else{
|
333
|
primer = Primer.NewInstance(null);
|
334
|
primer.setLabel(primerName);
|
335
|
}
|
336
|
singleRead.setPrimer(primer);
|
337
|
//primer sequence
|
338
|
NodeList primerSequenceList = sequencingPrimer.getElementsByTagName(prefix+"primerSequence");
|
339
|
primer.setSequence(SequenceString.NewInstance(AbcdParseUtility.parseFirstTextContent(primerSequenceList)));
|
340
|
//primer direction
|
341
|
String direction = parseFirstAttribute("Direction", primerSequenceList);
|
342
|
if(direction!=null){
|
343
|
if(direction.equals(FORWARD)){
|
344
|
amplification.setForwardPrimer(primer);
|
345
|
}
|
346
|
else if(direction.equals(REVERSE)){
|
347
|
amplification.setReversePrimer(primer);
|
348
|
}
|
349
|
}
|
350
|
//reference citation
|
351
|
NodeList primerReferenceCitationList = sequencingPrimer.getElementsByTagName(prefix+"primerReferenceCitation");
|
352
|
String primerReferenceCitation = AbcdParseUtility.parseFirstTextContent(primerReferenceCitationList);
|
353
|
Reference reference = AbcdParseUtility.parseFirstReference(primerReferenceCitationList, cdmAppController);
|
354
|
primer.setPublishedIn(reference);
|
355
|
|
356
|
cdmAppController.getPrimerService().save(primer);
|
357
|
}
|
358
|
}
|
359
|
}
|
360
|
}
|
361
|
|
362
|
private String parseFirstAttribute(String attributeName, NodeList nodeList) {
|
363
|
String attribute = null;
|
364
|
if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
|
365
|
Element element = (Element)nodeList.item(0);
|
366
|
attribute = element.getAttribute(attributeName);
|
367
|
}
|
368
|
return attribute;
|
369
|
}
|
370
|
|
371
|
private void parseGeneticAccession(NodeList geneticAccessionList, Sequence sequence) {
|
372
|
for(int i=0;i<geneticAccessionList.getLength();i++){
|
373
|
if(geneticAccessionList.item(i) instanceof Element){
|
374
|
//genetic accession number
|
375
|
NodeList geneticAccessionNumberList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumber");
|
376
|
sequence.setGeneticAccessionNumber(AbcdParseUtility.parseFirstTextContent(geneticAccessionNumberList));
|
377
|
|
378
|
//genetic accession number uri
|
379
|
NodeList geneticAccessionNumberUriList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumberURI");
|
380
|
//TODO: this is different from the geneticAccessionNumber
|
381
|
|
382
|
}
|
383
|
}
|
384
|
}
|
385
|
|
386
|
}
|