1
|
// $Id$
|
2
|
/**
|
3
|
* Copyright (C) 2015 EDIT
|
4
|
* European Distributed Institute of Taxonomy
|
5
|
* http://www.e-taxonomy.eu
|
6
|
*
|
7
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
8
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
9
|
*/
|
10
|
package eu.etaxonomy.cdm.io.specimen.abcd206.in.ggbn;
|
11
|
|
12
|
import java.net.URI;
|
13
|
import java.util.List;
|
14
|
|
15
|
import org.apache.log4j.Logger;
|
16
|
import org.joda.time.DateTime;
|
17
|
import org.w3c.dom.Element;
|
18
|
import org.w3c.dom.Node;
|
19
|
import org.w3c.dom.NodeList;
|
20
|
|
21
|
import eu.etaxonomy.cdm.api.application.ICdmApplicationConfiguration;
|
22
|
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportReport;
|
23
|
import eu.etaxonomy.cdm.io.specimen.abcd206.in.Abcd206ImportState;
|
24
|
import eu.etaxonomy.cdm.model.common.DefinedTerm;
|
25
|
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
|
26
|
import eu.etaxonomy.cdm.model.media.Media;
|
27
|
import eu.etaxonomy.cdm.model.molecular.Amplification;
|
28
|
import eu.etaxonomy.cdm.model.molecular.AmplificationResult;
|
29
|
import eu.etaxonomy.cdm.model.molecular.DnaQuality;
|
30
|
import eu.etaxonomy.cdm.model.molecular.DnaSample;
|
31
|
import eu.etaxonomy.cdm.model.molecular.Primer;
|
32
|
import eu.etaxonomy.cdm.model.molecular.Sequence;
|
33
|
import eu.etaxonomy.cdm.model.molecular.SequenceDirection;
|
34
|
import eu.etaxonomy.cdm.model.molecular.SequenceString;
|
35
|
import eu.etaxonomy.cdm.model.molecular.SingleRead;
|
36
|
import eu.etaxonomy.cdm.model.molecular.SingleReadAlignment;
|
37
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
38
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
39
|
import eu.etaxonomy.cdm.persistence.query.MatchMode;
|
40
|
|
41
|
/**
|
42
|
* @author pplitzner
|
43
|
* @date Mar 4, 2015
|
44
|
*
|
45
|
*/
|
46
|
public class AbcdGgbnParser {
|
47
|
|
48
|
private static final String FORWARD = "forward";
|
49
|
|
50
|
private static final String REVERSE = "reverse";
|
51
|
|
52
|
private static final Logger logger = Logger.getLogger(AbcdGgbnParser.class);
|
53
|
|
54
|
private final String prefix = "ggbn:";
|
55
|
|
56
|
private final Abcd206ImportReport report;
|
57
|
|
58
|
private final ICdmApplicationConfiguration cdmAppController;
|
59
|
|
60
|
public AbcdGgbnParser(Abcd206ImportReport report, ICdmApplicationConfiguration cdmAppController) {
|
61
|
this.report = report;
|
62
|
this.cdmAppController = cdmAppController;
|
63
|
}
|
64
|
|
65
|
public DnaSample parse(NodeList ggbn, Abcd206ImportState state) {
|
66
|
DnaSample dnaSample = DnaSample.NewInstance();
|
67
|
|
68
|
for(int i=0;i<ggbn.getLength();i++){
|
69
|
Node item = ggbn.item(i);
|
70
|
if(item instanceof Element){
|
71
|
Element element = (Element) item;
|
72
|
NodeList methodDeterminationConcentrationAndRatiosList = element.getElementsByTagName(prefix+"methodDeterminationConcentrationAndRatios");
|
73
|
NodeList volumeList = element.getElementsByTagName(prefix+"volume");
|
74
|
NodeList weightList = element.getElementsByTagName(prefix+"weight");
|
75
|
NodeList methodDeterminationWeightList = element.getElementsByTagName(prefix+"methodDeterminationWeight");
|
76
|
NodeList DNADNAHybridizationList = element.getElementsByTagName(prefix+"DNADNAHybridization");
|
77
|
NodeList DNAMeltingPointList = element.getElementsByTagName(prefix+"DNAMeltingPoint");
|
78
|
NodeList estimatedSizeList = element.getElementsByTagName(prefix+"estimated_size");
|
79
|
NodeList poolDnaExtractsList = element.getElementsByTagName(prefix+"pool_dna_extracts");
|
80
|
NodeList gelImageList = element.getElementsByTagName(prefix+"gelImage");
|
81
|
NodeList amplificationsList = element.getElementsByTagName(prefix+"Amplifications");
|
82
|
|
83
|
// dnaSample.setDnaQuality(parseDnaQuality(element, state));
|
84
|
|
85
|
parseGelImage(gelImageList, state);
|
86
|
parseAmplifications(amplificationsList, dnaSample, state);
|
87
|
}
|
88
|
}
|
89
|
return dnaSample;
|
90
|
}
|
91
|
|
92
|
private DnaQuality parseDnaQuality(Element element, Abcd206ImportState state) {
|
93
|
DnaQuality dnaQuality = DnaQuality.NewInstance();
|
94
|
|
95
|
NodeList purificationMethodList = element.getElementsByTagName(prefix+"purificationMethod");
|
96
|
// dnaQuality.setPurificationMethod(purificationMethod)
|
97
|
|
98
|
NodeList concentrationList = element.getElementsByTagName(prefix+"concentration");
|
99
|
if(concentrationList.getLength()==1){
|
100
|
Node concentration = concentrationList.item(0);
|
101
|
dnaQuality.setConcentration(parseDouble(concentration));
|
102
|
if(concentration instanceof Element){
|
103
|
String unit = ((Element) concentration).getAttribute("Unit");
|
104
|
// dnaQuality.setConcentrationUnit(concentrationUnit)
|
105
|
}
|
106
|
}
|
107
|
|
108
|
NodeList ratioOfAbsorbance260_280List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_280");
|
109
|
dnaQuality.setRatioOfAbsorbance260_280(parseFirstNodeDouble(ratioOfAbsorbance260_280List));
|
110
|
|
111
|
NodeList ratioOfAbsorbance260_230List = element.getElementsByTagName(prefix+"ratioOfAbsorbance260_230");
|
112
|
dnaQuality.setRatioOfAbsorbance260_230(parseFirstNodeDouble(ratioOfAbsorbance260_230List));
|
113
|
|
114
|
NodeList qualityCheckDateList = element.getElementsByTagName(prefix+"qualityCheckDate");
|
115
|
if(qualityCheckDateList.item(0)!=null){
|
116
|
dnaQuality.setQualityCheckDate(DateTime.parse(qualityCheckDateList.item(0).getTextContent()));
|
117
|
}
|
118
|
|
119
|
NodeList qualityList = element.getElementsByTagName(prefix+"quality");
|
120
|
NodeList qualityRemarksList = element.getElementsByTagName(prefix+"qualityRemarks");
|
121
|
|
122
|
// dnaQuality.setQualityTerm(qualityTerm)
|
123
|
|
124
|
return dnaQuality;
|
125
|
}
|
126
|
|
127
|
private void parseGelImage(NodeList gelImageList, Abcd206ImportState state) {
|
128
|
if(gelImageList.item(0)!=null && gelImageList.item(0) instanceof Element){
|
129
|
Element gelImage = (Element)gelImageList.item(0);
|
130
|
NodeList fileURIList = gelImage.getElementsByTagName("fileURI");
|
131
|
NodeList gelVoltageList = gelImage.getElementsByTagName("gelVoltage");
|
132
|
NodeList gelConcentrationList = gelImage.getElementsByTagName("gelConcentration");
|
133
|
NodeList gelDurationList = gelImage.getElementsByTagName("gelDuration");
|
134
|
NodeList gelLadderList = gelImage.getElementsByTagName("gelLadder");
|
135
|
NodeList gelStainList = gelImage.getElementsByTagName("gelStain");
|
136
|
NodeList gelRemarksList = gelImage.getElementsByTagName("gelRemarks");
|
137
|
|
138
|
}
|
139
|
|
140
|
}
|
141
|
|
142
|
private void parseAmplifications(NodeList amplificationsList, DnaSample dnaSample, Abcd206ImportState state) {
|
143
|
if(amplificationsList.item(0)!=null && amplificationsList.item(0) instanceof Element){
|
144
|
AmplificationResult amplificationResult = AmplificationResult.NewInstance();
|
145
|
Amplification amplification = Amplification.NewInstance();
|
146
|
NodeList amplificationList = ((Element) amplificationsList.item(0)).getElementsByTagName(prefix+"amplification");
|
147
|
for(int i=0;i<amplificationList.getLength();i++){
|
148
|
if(amplificationList.item(i) instanceof Element){
|
149
|
Element amplificationElement = (Element)amplificationList.item(i);
|
150
|
NodeList amplificationDateList = amplificationElement.getElementsByTagName(prefix+"amplificationDate");
|
151
|
NodeList amplificationStaffList = amplificationElement.getElementsByTagName(prefix+"amplificationStaff");
|
152
|
|
153
|
NodeList markerList = amplificationElement.getElementsByTagName(prefix+"marker");
|
154
|
if(markerList.item(0)!=null){
|
155
|
String amplificationMarker = markerList.item(0).getTextContent();
|
156
|
DefinedTerm dnaMarker = null;
|
157
|
List<DefinedTermBase> markersFound = cdmAppController.getTermService().findByTitle(DefinedTerm.class, amplificationMarker, MatchMode.EXACT, null, null, null, null, null).getRecords();
|
158
|
if(markersFound.size()==1){
|
159
|
dnaMarker = (DefinedTerm) markersFound.get(0);
|
160
|
}
|
161
|
else{
|
162
|
dnaMarker = DefinedTerm.NewDnaMarkerInstance(amplificationMarker, amplificationMarker, amplificationMarker);
|
163
|
cdmAppController.getTermService().saveOrUpdate(dnaMarker);
|
164
|
}
|
165
|
amplification.setDnaMarker(dnaMarker);
|
166
|
}
|
167
|
|
168
|
NodeList markerSubfragmentList = amplificationElement.getElementsByTagName(prefix+"markerSubfragment");
|
169
|
NodeList amplificationSuccessList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccess");
|
170
|
NodeList amplificationSuccessDetailsList = amplificationElement.getElementsByTagName(prefix+"amplificationSuccessDetails");
|
171
|
NodeList amplificationMethodList = amplificationElement.getElementsByTagName(prefix+"amplificationMethod");
|
172
|
NodeList purificationMethodList = amplificationElement.getElementsByTagName(prefix+"purificationMethod");
|
173
|
NodeList libReadsSeqdList = amplificationElement.getElementsByTagName(prefix+"lib_reads_seqd");
|
174
|
NodeList libScreenList = amplificationElement.getElementsByTagName(prefix+"lib_screen");
|
175
|
NodeList libVectorList = amplificationElement.getElementsByTagName(prefix+"lib_vector");
|
176
|
NodeList libConstMethList = amplificationElement.getElementsByTagName(prefix+"lib_const_meth");
|
177
|
NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
|
178
|
|
179
|
NodeList sequencingsList = amplificationElement.getElementsByTagName(prefix+"Sequencings");
|
180
|
if(sequencingsList.item(0)!=null && sequencingsList.item(0) instanceof Element){
|
181
|
parseAmplificationSequencings((Element)sequencingsList.item(0), amplification, dnaSample, state);
|
182
|
}
|
183
|
parseAmplificationPrimers(amplificationElement.getElementsByTagName(prefix+"AmplificationPrimers"));
|
184
|
}
|
185
|
}
|
186
|
amplificationResult.setAmplification(amplification);
|
187
|
dnaSample.addAmplificationResult(amplificationResult);
|
188
|
}
|
189
|
}
|
190
|
|
191
|
private void parseAmplificationPrimers(NodeList elementsByTagName) {
|
192
|
// TODO Auto-generated method stub
|
193
|
|
194
|
}
|
195
|
|
196
|
private void parseAmplificationSequencings(Element sequencings, Amplification amplification, DnaSample dnaSample, Abcd206ImportState state) {
|
197
|
NodeList sequencingList = sequencings.getElementsByTagName(prefix+"sequencing");
|
198
|
for(int i=0;i<sequencingList.getLength();i++){
|
199
|
Sequence sequence = Sequence.NewInstance("");
|
200
|
dnaSample.addSequence(sequence);
|
201
|
|
202
|
if(sequencingList.item(i) instanceof Element){
|
203
|
Element sequencing = (Element)sequencingList.item(i);
|
204
|
|
205
|
//singleSequencings
|
206
|
NodeList singleSequencingsList = sequencing.getElementsByTagName(prefix+"SingleSequencings");
|
207
|
parseSingleSequencings(singleSequencingsList, amplification, sequence);
|
208
|
//Consensus sequence
|
209
|
NodeList consensusSequencesList = sequencing.getElementsByTagName(prefix+"consensusSequence");
|
210
|
sequence.setConsensusSequence(SequenceString.NewInstance(parseFirstTextContent(consensusSequencesList)));
|
211
|
//sequence length
|
212
|
NodeList consensusSequencesLengthList = sequencing.getElementsByTagName(prefix+"consensusSequenceLength");
|
213
|
if(sequence.getConsensusSequence()!=null){
|
214
|
//TODO: this can be different from the actual length in ABCD but not in CDM!
|
215
|
sequence.getConsensusSequence().setLength(parseFirstNodeDouble(consensusSequencesLengthList).intValue());
|
216
|
}
|
217
|
//contig file URL
|
218
|
NodeList consensusSequenceChromatogramFileURIList = sequencing.getElementsByTagName(prefix+"consensusSequenceChromatogramFileURI");
|
219
|
URI uri = parseFirstUri(consensusSequenceChromatogramFileURIList);
|
220
|
Media contigFile = Media.NewInstance(uri, null, null, null);
|
221
|
sequence.setContigFile(contigFile);
|
222
|
|
223
|
//genetic Accession
|
224
|
NodeList geneticAccessionList = sequencing.getElementsByTagName(prefix+"geneticAccession");
|
225
|
parseGeneticAccession(geneticAccessionList, sequence);
|
226
|
|
227
|
//references
|
228
|
NodeList referencesList = sequencing.getElementsByTagName(prefix+"References");
|
229
|
if(referencesList.item(0)!=null && referencesList.item(0) instanceof Element){
|
230
|
parseSequencingReferences((Element) referencesList.item(0), sequence);
|
231
|
}
|
232
|
}
|
233
|
}
|
234
|
// if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
|
235
|
// NodeList plasmidList = amplificationElement.getElementsByTagName(prefix+"plasmid");
|
236
|
|
237
|
}
|
238
|
|
239
|
private void parseSequencingReferences(Element references, Sequence sequence) {
|
240
|
NodeList referenceList = references.getElementsByTagName(prefix+"Reference");
|
241
|
for(int i=0;i<referenceList.getLength();i++){
|
242
|
if(referenceList.item(i) instanceof Element){
|
243
|
Element element = (Element)referenceList.item(i);
|
244
|
NodeList referenceCitationList = element.getElementsByTagName(prefix+"ReferenceCitation");
|
245
|
String referenceCitation = parseFirstTextContent(referenceCitationList);
|
246
|
List<Reference> matchedReferences = cdmAppController.getReferenceService().findByTitle(Reference.class, referenceCitation, MatchMode.EXACT, null, null, null, null, null).getRecords();
|
247
|
Reference<?> reference;
|
248
|
if(matchedReferences.size()==1){
|
249
|
reference = matchedReferences.iterator().next();
|
250
|
}
|
251
|
else{
|
252
|
reference = ReferenceFactory.newGeneric();
|
253
|
reference.setTitle(referenceCitation);
|
254
|
cdmAppController.getReferenceService().saveOrUpdate(reference);
|
255
|
}
|
256
|
sequence.addCitation(reference);
|
257
|
}
|
258
|
}
|
259
|
|
260
|
}
|
261
|
|
262
|
private void parseSingleSequencings(NodeList singleSequencingsList, Amplification amplification, Sequence sequence) {
|
263
|
if(singleSequencingsList.item(0)!=null && singleSequencingsList.item(0) instanceof Element){
|
264
|
Element singleSequencings = (Element)singleSequencingsList.item(0);
|
265
|
NodeList singleSequencingList = singleSequencings.getElementsByTagName(prefix+"singleSequencing");
|
266
|
for(int i=0;i<singleSequencingList.getLength();i++){
|
267
|
//single read
|
268
|
SingleRead singleRead = SingleRead.NewInstance();
|
269
|
SingleReadAlignment.NewInstance(sequence, singleRead);
|
270
|
if(singleSequencingList.item(i) instanceof Element){
|
271
|
Element singleSequencing = (Element)singleSequencingList.item(i);
|
272
|
NodeList sequencingDirectionList = singleSequencing.getElementsByTagName(prefix+"sequencingDirection");
|
273
|
//read direction
|
274
|
String singleReadDirection = parseFirstTextContent(sequencingDirectionList);
|
275
|
if(singleReadDirection.equals(FORWARD)){
|
276
|
singleRead.setDirection(SequenceDirection.Forward);
|
277
|
}
|
278
|
else if(singleReadDirection.equals(REVERSE)){
|
279
|
singleRead.setDirection(SequenceDirection.Reverse);
|
280
|
}
|
281
|
//read pherogram URI
|
282
|
NodeList chromatogramFileURIList = singleSequencing.getElementsByTagName(prefix+"chromatogramFileURI");
|
283
|
singleRead.setPherogram(Media.NewInstance(parseFirstUri(chromatogramFileURIList), null, null, null));
|
284
|
NodeList sequencingPrimersList = singleSequencing.getElementsByTagName(prefix+"SequencingPrimers");
|
285
|
parseSequencingPrimers(sequencingPrimersList, amplification);
|
286
|
}
|
287
|
}
|
288
|
}
|
289
|
}
|
290
|
|
291
|
/**
|
292
|
* @param sequencingPrimersList
|
293
|
* @param amplification
|
294
|
*/
|
295
|
private void parseSequencingPrimers(NodeList sequencingPrimersList, Amplification amplification) {
|
296
|
if(sequencingPrimersList.item(0)!=null && sequencingPrimersList.item(0) instanceof Element){
|
297
|
Primer primer = Primer.NewInstance(null);
|
298
|
Element sequencingPrimers = (Element)sequencingPrimersList.item(0);
|
299
|
NodeList sequencingPrimerList = sequencingPrimers.getElementsByTagName(prefix+"sequencingPrimer");
|
300
|
for(int i=0;i<sequencingPrimerList.getLength();i++){
|
301
|
if(sequencingPrimerList.item(i) instanceof Element){
|
302
|
Element sequencingPrimer = (Element)sequencingPrimerList.item(i);
|
303
|
//primer sequence
|
304
|
NodeList primerSequenceList = sequencingPrimer.getElementsByTagName(prefix+"primerSequence");
|
305
|
primer.setSequence(SequenceString.NewInstance(parseFirstTextContent(primerSequenceList)));
|
306
|
//primer direction
|
307
|
String direction = parseFirstAttribute("Direction", primerSequenceList);
|
308
|
if(direction!=null){
|
309
|
if(direction.equals(FORWARD)){
|
310
|
amplification.setForwardPrimer(primer);
|
311
|
}
|
312
|
else if(direction.equals(REVERSE)){
|
313
|
amplification.setReversePrimer(primer);
|
314
|
}
|
315
|
}
|
316
|
//primer name
|
317
|
NodeList primerNameList = sequencingPrimer.getElementsByTagName(prefix+"primerName");
|
318
|
primer.setLabel(parseFirstTextContent(primerNameList));
|
319
|
//reference citation
|
320
|
NodeList primerReferenceCitationList = sequencingPrimer.getElementsByTagName(prefix+"primerReferenceCitation");
|
321
|
String primerReferenceCitation = parseFirstTextContent(primerReferenceCitationList);
|
322
|
List<Reference> matchingReferences = cdmAppController.getReferenceService().findByTitle(Reference.class, primerReferenceCitation, MatchMode.EXACT, null, null, null, null, null).getRecords();
|
323
|
Reference<?> primerReference;
|
324
|
if(matchingReferences.size()==1){
|
325
|
primerReference = matchingReferences.iterator().next();
|
326
|
}
|
327
|
else{
|
328
|
primerReference = ReferenceFactory.newGeneric();
|
329
|
primerReference.setTitle(primerReferenceCitation);
|
330
|
cdmAppController.getReferenceService().saveOrUpdate(primerReference);
|
331
|
}
|
332
|
primer.setPublishedIn(primerReference);
|
333
|
}
|
334
|
}
|
335
|
}
|
336
|
}
|
337
|
|
338
|
private String parseFirstAttribute(String attributeName, NodeList nodeList) {
|
339
|
String attribute = null;
|
340
|
if(nodeList.item(0)!=null && nodeList.item(0) instanceof Element){
|
341
|
Element element = (Element)nodeList.item(0);
|
342
|
attribute = element.getAttribute(attributeName);
|
343
|
}
|
344
|
return attribute;
|
345
|
}
|
346
|
|
347
|
private void parseGeneticAccession(NodeList geneticAccessionList, Sequence sequence) {
|
348
|
for(int i=0;i<geneticAccessionList.getLength();i++){
|
349
|
if(geneticAccessionList.item(i) instanceof Element){
|
350
|
//genetic accession number
|
351
|
NodeList geneticAccessionNumberList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumber");
|
352
|
sequence.setGeneticAccessionNumber(parseFirstTextContent(geneticAccessionNumberList));
|
353
|
|
354
|
//genetic accession number uri
|
355
|
NodeList geneticAccessionNumberUriList = ((Element)geneticAccessionList.item(i)).getElementsByTagName(prefix+"geneticAccessionNumberURI");
|
356
|
//TODO: this is different from the geneticAccessionNumber
|
357
|
|
358
|
}
|
359
|
}
|
360
|
}
|
361
|
|
362
|
private URI parseFirstUri(NodeList nodeList){
|
363
|
URI uri = null;
|
364
|
if(nodeList.item(0)!=null){
|
365
|
String textContent = nodeList.item(0).getTextContent();
|
366
|
if(textContent!=null){
|
367
|
try {
|
368
|
uri = URI.create(textContent);
|
369
|
} catch (IllegalArgumentException e) {
|
370
|
//nothing
|
371
|
}
|
372
|
}
|
373
|
}
|
374
|
return uri;
|
375
|
}
|
376
|
|
377
|
private String parseFirstTextContent(NodeList nodeList){
|
378
|
String string = null;
|
379
|
if(nodeList.getLength()>0){
|
380
|
string = nodeList.item(0).getTextContent().replace("\n", "").replaceAll("( )+", " ").trim();
|
381
|
}
|
382
|
return string;
|
383
|
}
|
384
|
|
385
|
private Double parseFirstNodeDouble(NodeList nodeList){
|
386
|
if(nodeList.getLength()>0){
|
387
|
return parseDouble(nodeList.item(0));
|
388
|
}
|
389
|
return null;
|
390
|
}
|
391
|
|
392
|
private Double parseDouble(Node node){
|
393
|
String message = "Could not parse double value for node " + node.getNodeName();
|
394
|
Double doubleValue = null;
|
395
|
try{
|
396
|
String textContent = node.getTextContent();
|
397
|
//remove 1000 dots
|
398
|
textContent = textContent.replace(".","");
|
399
|
//convert commmas
|
400
|
textContent = textContent.replace(",",".");
|
401
|
doubleValue = Double.parseDouble(textContent);
|
402
|
} catch (NullPointerException npe){
|
403
|
logger.error(message, npe);
|
404
|
} catch (NumberFormatException nfe){
|
405
|
logger.error(message, nfe);
|
406
|
}
|
407
|
return doubleValue;
|
408
|
}
|
409
|
|
410
|
}
|