1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.algaterra;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.HashSet;
|
16
|
import java.util.Map;
|
17
|
import java.util.Set;
|
18
|
|
19
|
import org.apache.commons.lang.StringUtils;
|
20
|
import org.apache.log4j.Logger;
|
21
|
import org.springframework.stereotype.Component;
|
22
|
|
23
|
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
|
24
|
import eu.etaxonomy.cdm.io.algaterra.validation.AlgaTerraDnaImportValidator;
|
25
|
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator;
|
26
|
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState;
|
27
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
28
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
29
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
30
|
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
|
31
|
import eu.etaxonomy.cdm.model.molecular.DnaSample;
|
32
|
import eu.etaxonomy.cdm.model.molecular.Locus;
|
33
|
import eu.etaxonomy.cdm.model.molecular.Sequence;
|
34
|
import eu.etaxonomy.cdm.model.occurrence.Collection;
|
35
|
import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
|
36
|
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
|
37
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
38
|
|
39
|
|
40
|
/**
|
41
|
* @author a.mueller
|
42
|
* @created 01.09.2012
|
43
|
*/
|
44
|
@Component
|
45
|
public class AlgaTerraDnaImport extends AlgaTerraSpecimenImportBase {
|
46
|
private static final Logger logger = Logger.getLogger(AlgaTerraDnaImport.class);
|
47
|
|
48
|
|
49
|
private static int modCount = 5000;
|
50
|
private static final String pluralString = "dna facts";
|
51
|
private static final String dbTableName = "DNAFact"; //??
|
52
|
|
53
|
|
54
|
public AlgaTerraDnaImport(){
|
55
|
super(dbTableName, pluralString);
|
56
|
}
|
57
|
|
58
|
|
59
|
|
60
|
/* (non-Javadoc)
|
61
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
|
62
|
*/
|
63
|
@Override
|
64
|
protected String getIdQuery(BerlinModelImportState state) {
|
65
|
String result = " SELECT df.DNAFactId " +
|
66
|
" FROM DNAFact df INNER JOIN Fact f ON f.ExtensionFk = df.DNAFactID " +
|
67
|
" ORDER BY df.DNAFactID ";
|
68
|
return result;
|
69
|
}
|
70
|
|
71
|
/* (non-Javadoc)
|
72
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
|
73
|
*/
|
74
|
@Override
|
75
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
76
|
String strQuery =
|
77
|
" SELECT * " +
|
78
|
" FROM DNAFact df INNER JOIN Fact f ON f.ExtensionFk = df.DNAFactID " +
|
79
|
" WHERE (df.DNAFactId IN (" + ID_LIST_TOKEN + ") )"
|
80
|
+ " ORDER BY DNAFactID "
|
81
|
;
|
82
|
return strQuery;
|
83
|
}
|
84
|
|
85
|
/* (non-Javadoc)
|
86
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
|
87
|
*/
|
88
|
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState bmState) {
|
89
|
boolean success = true;
|
90
|
|
91
|
AlgaTerraImportState state = (AlgaTerraImportState)bmState;
|
92
|
try {
|
93
|
// makeVocabulariesAndFeatures(state);
|
94
|
} catch (Exception e1) {
|
95
|
logger.warn("Exception occurred when trying to create Ecofact vocabularies: " + e1.getMessage());
|
96
|
e1.printStackTrace();
|
97
|
}
|
98
|
Set<SpecimenOrObservationBase> objectsToSave = new HashSet<SpecimenOrObservationBase>();
|
99
|
|
100
|
//TODO do we still need this map? EcoFacts are not handled separate from Facts.
|
101
|
//However, they have duplicates on derived unit level. Also check duplicateFk.
|
102
|
Map<String, FieldObservation> ecoFactFieldObservationMap = (Map<String, FieldObservation>) partitioner.getObjectMap(ECO_FACT_FIELD_OBSERVATION_NAMESPACE);
|
103
|
|
104
|
ResultSet rs = partitioner.getResultSet();
|
105
|
|
106
|
try {
|
107
|
|
108
|
int i = 0;
|
109
|
|
110
|
//for each reference
|
111
|
while (rs.next()){
|
112
|
|
113
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
|
114
|
|
115
|
int dnaFactId = rs.getInt("DNAFactId");
|
116
|
int ecoFactFk = nullSafeInt(rs, "EcoFactFk");
|
117
|
String sequenceStr = rs.getString("PlainSequence");
|
118
|
String keywordsStr = rs.getString("Keywords");
|
119
|
String locusStr = rs.getString("Locus");
|
120
|
String definitionStr = rs.getString("Definition");
|
121
|
|
122
|
try {
|
123
|
|
124
|
//source ref
|
125
|
Reference<?> sourceRef = state.getTransactionalSourceReference();
|
126
|
|
127
|
//facade
|
128
|
DnaSample dnaSample = DnaSample.NewInstance();
|
129
|
|
130
|
Sequence sequence = Sequence.NewInstance(sequenceStr);
|
131
|
dnaSample.addSequences(sequence);
|
132
|
|
133
|
Locus locus = Locus.NewInstance(locusStr, definitionStr);
|
134
|
|
135
|
sequence.setLocus(locus);
|
136
|
|
137
|
|
138
|
|
139
|
// handleFirstDerivedSpecimen(rs, facade, state, partitioner);
|
140
|
// handleEcoFactSpecificDerivedUnit(rs,facade, state);
|
141
|
|
142
|
|
143
|
objectsToSave.add(dnaSample);
|
144
|
|
145
|
|
146
|
} catch (Exception e) {
|
147
|
logger.warn("Exception in ecoFact: ecoFactId " + dnaFactId + ". " + e.getMessage());
|
148
|
e.printStackTrace();
|
149
|
}
|
150
|
|
151
|
}
|
152
|
|
153
|
// logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
|
154
|
|
155
|
logger.warn("Taxa to save: " + objectsToSave.size());
|
156
|
getOccurrenceService().save(objectsToSave);
|
157
|
|
158
|
return success;
|
159
|
} catch (SQLException e) {
|
160
|
logger.error("SQLException:" + e);
|
161
|
return false;
|
162
|
}
|
163
|
}
|
164
|
|
165
|
protected String getDerivedUnitNameSpace(){
|
166
|
return ECO_FACT_DERIVED_UNIT_NAMESPACE;
|
167
|
}
|
168
|
|
169
|
protected String getFieldObservationNameSpace(){
|
170
|
return ECO_FACT_FIELD_OBSERVATION_NAMESPACE;
|
171
|
}
|
172
|
|
173
|
|
174
|
private DerivedUnitType makeDerivedUnitType(String recordBasis) {
|
175
|
DerivedUnitType result = null;
|
176
|
if (StringUtils.isBlank(recordBasis)){
|
177
|
result = DerivedUnitType.DerivedUnit;
|
178
|
} else if (recordBasis.equalsIgnoreCase("FossileSpecimen")){
|
179
|
result = DerivedUnitType.Fossil;
|
180
|
}else if (recordBasis.equalsIgnoreCase("HumanObservation")){
|
181
|
result = DerivedUnitType.Observation;
|
182
|
}else if (recordBasis.equalsIgnoreCase("Literature")){
|
183
|
logger.warn("Literature record basis not yet supported");
|
184
|
result = DerivedUnitType.DerivedUnit;
|
185
|
}else if (recordBasis.equalsIgnoreCase("LivingSpecimen")){
|
186
|
result = DerivedUnitType.LivingBeing;
|
187
|
}else if (recordBasis.equalsIgnoreCase("MachineObservation")){
|
188
|
logger.warn("MachineObservation record basis not yet supported");
|
189
|
result = DerivedUnitType.Observation;
|
190
|
}else if (recordBasis.equalsIgnoreCase("PreservedSpecimen")){
|
191
|
result = DerivedUnitType.Specimen;
|
192
|
}
|
193
|
return result;
|
194
|
}
|
195
|
|
196
|
/* (non-Javadoc)
|
197
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
|
198
|
*/
|
199
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
|
200
|
String nameSpace;
|
201
|
Class cdmClass;
|
202
|
Set<String> idSet;
|
203
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
204
|
|
205
|
try{
|
206
|
Set<String> fieldObservationIdSet = new HashSet<String>();
|
207
|
Set<String> termsIdSet = new HashSet<String>();
|
208
|
Set<String> collectionIdSet = new HashSet<String>();
|
209
|
|
210
|
while (rs.next()){
|
211
|
// handleForeignKey(rs, fieldObservationIdSet, "DuplicateFk");
|
212
|
// handleForeignKey(rs, termsIdSet, "LifeFormFk");
|
213
|
// handleForeignKey(rs, collectionIdSet, "CollectionFk");
|
214
|
}
|
215
|
|
216
|
//field observation map for duplicates
|
217
|
nameSpace = AlgaTerraDnaImport.ECO_FACT_FIELD_OBSERVATION_NAMESPACE;
|
218
|
cdmClass = FieldObservation.class;
|
219
|
idSet = fieldObservationIdSet;
|
220
|
Map<String, FieldObservation> fieldObservationMap = (Map<String, FieldObservation>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
221
|
result.put(nameSpace, fieldObservationMap);
|
222
|
|
223
|
//collections
|
224
|
nameSpace = AlgaTerraCollectionImport.NAMESPACE_COLLECTION;
|
225
|
cdmClass = Collection.class;
|
226
|
idSet = collectionIdSet;
|
227
|
Map<String, Collection> collectionMap = (Map<String, Collection>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
228
|
result.put(nameSpace, collectionMap);
|
229
|
|
230
|
//sub-collections
|
231
|
nameSpace = AlgaTerraCollectionImport.NAMESPACE_SUBCOLLECTION;
|
232
|
cdmClass = Collection.class;
|
233
|
idSet = collectionIdSet;
|
234
|
Map<String, Collection> subCollectionMap = (Map<String, Collection>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
235
|
result.put(nameSpace, subCollectionMap);
|
236
|
|
237
|
//terms
|
238
|
nameSpace = AlgaTerraDnaImport.TERMS_NAMESPACE;
|
239
|
cdmClass = FieldObservation.class;
|
240
|
idSet = termsIdSet;
|
241
|
Map<String, DefinedTermBase> termMap = (Map<String, DefinedTermBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
242
|
result.put(nameSpace, termMap);
|
243
|
|
244
|
} catch (SQLException e) {
|
245
|
throw new RuntimeException(e);
|
246
|
}
|
247
|
return result;
|
248
|
}
|
249
|
|
250
|
|
251
|
|
252
|
/* (non-Javadoc)
|
253
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
|
254
|
*/
|
255
|
@Override
|
256
|
protected boolean doCheck(BerlinModelImportState state){
|
257
|
IOValidator<BerlinModelImportState> validator = new AlgaTerraDnaImportValidator();
|
258
|
return validator.validate(state);
|
259
|
}
|
260
|
|
261
|
|
262
|
/* (non-Javadoc)
|
263
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
264
|
*/
|
265
|
protected boolean isIgnore(BerlinModelImportState state){
|
266
|
return ! ((AlgaTerraImportState)state).getAlgaTerraConfigurator().isDoDna();
|
267
|
}
|
268
|
|
269
|
}
|