1
|
/**
|
2
|
* Copyright (C) 2015 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.edaphobase;
|
10
|
|
11
|
import java.sql.ResultSet;
|
12
|
import java.sql.SQLException;
|
13
|
import java.util.HashMap;
|
14
|
import java.util.HashSet;
|
15
|
import java.util.Map;
|
16
|
import java.util.Set;
|
17
|
import java.util.regex.Matcher;
|
18
|
import java.util.regex.Pattern;
|
19
|
|
20
|
import org.apache.commons.lang.StringUtils;
|
21
|
import org.apache.log4j.Logger;
|
22
|
import org.springframework.stereotype.Component;
|
23
|
|
24
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25
|
import eu.etaxonomy.cdm.io.common.IPartitionedIO;
|
26
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
27
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
28
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
29
|
import eu.etaxonomy.cdm.model.common.Language;
|
30
|
import eu.etaxonomy.cdm.model.description.Feature;
|
31
|
import eu.etaxonomy.cdm.model.description.State;
|
32
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
33
|
import eu.etaxonomy.cdm.model.term.Representation;
|
34
|
import eu.etaxonomy.cdm.model.term.TermType;
|
35
|
import eu.etaxonomy.cdm.model.term.TermVocabulary;
|
36
|
|
37
|
/**
|
38
|
* @author a.mueller
|
39
|
* @since 18.12.2015
|
40
|
*
|
41
|
*/
|
42
|
@Component
|
43
|
public class EdaphobaseDescriptionImport extends EdaphobaseImportBase {
|
44
|
private static final long serialVersionUID = -9138378836474086070L;
|
45
|
private static final Logger logger = Logger.getLogger(EdaphobaseDescriptionImport.class);
|
46
|
|
47
|
private static final String tableName = "description_detail";
|
48
|
|
49
|
private static final String pluralString = "descriptions";
|
50
|
|
51
|
|
52
|
/**
|
53
|
* @param tableName
|
54
|
* @param pluralString
|
55
|
*/
|
56
|
public EdaphobaseDescriptionImport() {
|
57
|
super(tableName, pluralString);
|
58
|
}
|
59
|
|
60
|
@Override
|
61
|
protected String getIdQuery(EdaphobaseImportState state) {
|
62
|
|
63
|
return " SELECT dd.description_detail_id " +
|
64
|
" FROM observation.observation o " +
|
65
|
" JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
|
66
|
" JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
|
67
|
" JOIN observation.column c ON dd.column_fk = c.column_id " +
|
68
|
" LEFT JOIN observation.unit u ON dd.unit_fk = u.unit_id " +
|
69
|
" WHERE o.first_description = true and o.taxon_fk is not null " +
|
70
|
" ORDER BY dd.description_detail_id, o.taxon_fk";
|
71
|
}
|
72
|
|
73
|
@Override
|
74
|
protected String getRecordQuery(EdaphobaseImportConfigurator config) {
|
75
|
String result = " SELECT DISTINCT dd.description_detail_id, o.taxon_fk, c.name_de, dd.min_value, "
|
76
|
+ " dd.max_value, dd.value, u.name, dd.text, dd.list_element_fk " +
|
77
|
" FROM observation.observation o " +
|
78
|
" JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
|
79
|
" JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
|
80
|
" JOIN observation.column c ON dd.column_fk = c.column_id " +
|
81
|
" LEFT JOIN observation.unit u ON dd.unit_fk = u.unit_id " +
|
82
|
" WHERE o.first_description = true and o.taxon_fk is not null " +
|
83
|
" AND dd.description_detail_id IN (@IDSET)";
|
84
|
|
85
|
result = result.replace("@IDSET", IPartitionedIO.ID_LIST_TOKEN);
|
86
|
return result;
|
87
|
}
|
88
|
|
89
|
@Override
|
90
|
protected void doInvoke(EdaphobaseImportState state) {
|
91
|
doFeatures(state);
|
92
|
super.doInvoke(state);
|
93
|
}
|
94
|
|
95
|
|
96
|
/**
|
97
|
*
|
98
|
*/
|
99
|
private void doFeatures(EdaphobaseImportState state) {
|
100
|
String sql = " SELECT c.*, l.list_id, l.is_hierarchical listIsHierarchical, l.name listName, l.attribute_order " +
|
101
|
" FROM observation.column c LEFT OUTER JOIN selective_list.list l ON c.list_fk = l.list_id " +
|
102
|
" WHERE c.column_id IN " +
|
103
|
" (SELECT dd.column_fk " +
|
104
|
" FROM observation.observation o " +
|
105
|
" JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
|
106
|
" JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
|
107
|
" WHERE o.first_description = true and o.taxon_fk is not null"
|
108
|
+ ")";
|
109
|
ResultSet rs = state.getConfig().getSource().getResultSet(sql);
|
110
|
try {
|
111
|
@SuppressWarnings("unchecked")
|
112
|
TermVocabulary<Feature> vocQuant = TermVocabulary.NewInstance(TermType.Feature, "Edaphobase quantitative features", "Quantitative features", "quant.", null);
|
113
|
vocQuant.setUuid(EdaphobaseImportTransformer.uuidVocFeatureQuantitative);
|
114
|
@SuppressWarnings("unchecked")
|
115
|
TermVocabulary<Feature> vocBiology = TermVocabulary.NewInstance(TermType.Feature, "Edaphobase biological features", "Biological features", "biol.", null);
|
116
|
vocBiology.setUuid(EdaphobaseImportTransformer.uuidVocFeatureBiological);
|
117
|
@SuppressWarnings("unchecked")
|
118
|
TermVocabulary<Feature> vocMorphology = TermVocabulary.NewInstance(TermType.Feature, "Edaphobase morphological features", "Morphological features", "morph.", null);
|
119
|
vocBiology.setUuid(EdaphobaseImportTransformer.uuidVocFeatureMorpho);
|
120
|
|
121
|
while (rs.next()){
|
122
|
handleSingleFeature(state, rs, vocQuant, vocBiology, vocMorphology);
|
123
|
}
|
124
|
getVocabularyService().save(vocQuant);
|
125
|
getVocabularyService().save(vocBiology);
|
126
|
getVocabularyService().save(vocMorphology);
|
127
|
} catch (SQLException e) {
|
128
|
e.printStackTrace();
|
129
|
}
|
130
|
}
|
131
|
|
132
|
/**
|
133
|
* @param state
|
134
|
* @param rs
|
135
|
* @param vocMorphology
|
136
|
* @param vocBiology
|
137
|
* @param vocQuant
|
138
|
* @throws SQLException
|
139
|
*/
|
140
|
private void handleSingleFeature(EdaphobaseImportState state, ResultSet rs,
|
141
|
TermVocabulary<Feature> vocQuant, TermVocabulary<Feature> vocBiology,
|
142
|
TermVocabulary<Feature> vocMorphology) throws SQLException {
|
143
|
int id = rs.getInt("column_id");
|
144
|
int minVal = rs.getInt("min_value");
|
145
|
int maxVal = rs.getInt("max_value"); //null
|
146
|
String nameDe = rs.getString("name_de");
|
147
|
String nameEn = rs.getString("name_en");
|
148
|
//required
|
149
|
Integer unit_fk = nullSafeInt(rs, "unit_fk"); //18=mm; 47=mg
|
150
|
int dataType = rs.getInt("data_type_fk"); //11475=Integer, 11476=Floating point, 11477=String, 11478=Categorical data
|
151
|
Integer list = nullSafeInt(rs, "list_fk"); //allows null, distinct values except for null
|
152
|
int columnGroup = rs.getInt("column_group"); //11661=Quantität, 11663=Biology, 11664=Morphology
|
153
|
String description = rs.getString("description");
|
154
|
String description_en = rs.getString("description_en");
|
155
|
//versionfield
|
156
|
checkNullStr(state, rs, id, "regEx", "tableName", "scheme_name","query","pg_preview_query");
|
157
|
//anonym (f, but once t)
|
158
|
//only once (f, sometimes t)
|
159
|
//inner_agg
|
160
|
//inner_summary
|
161
|
//hierarchical_level_fk //empty int
|
162
|
//usings (json), some values
|
163
|
|
164
|
String listName = rs.getString("listName");
|
165
|
|
166
|
Feature feature = Feature.NewInstance(description_en, nameEn, null);
|
167
|
feature.addRepresentation(Representation.NewInstance(description, nameDe, null, Language.GERMAN()));
|
168
|
|
169
|
//columnGroup => vocabulary
|
170
|
if (columnGroup == 11661){
|
171
|
vocQuant.addTerm(feature);
|
172
|
}else if (columnGroup == 11663){
|
173
|
vocBiology.addTerm(feature);
|
174
|
}else if (columnGroup == 11664){
|
175
|
vocMorphology.addTerm(feature);
|
176
|
}else {
|
177
|
logger.error("Unhandled column group "+ columnGroup);
|
178
|
}
|
179
|
|
180
|
//dataType
|
181
|
feature.setSupportsTextData(false);
|
182
|
if (dataType == 11475 || dataType == 11476){
|
183
|
feature.setSupportsQuantitativeData(true);
|
184
|
}else if (dataType == 11477){
|
185
|
feature.setSupportsTextData(true);
|
186
|
}else if (dataType == 11478){
|
187
|
feature.setSupportsCategoricalData(true);
|
188
|
}else{
|
189
|
logger.error("Unhandled dataType " + dataType);
|
190
|
}
|
191
|
|
192
|
//list
|
193
|
if (list != null){
|
194
|
//TODO term type
|
195
|
TermVocabulary<State> categoryVoc = makeCategoricalVocabulary(state, list, listName);
|
196
|
feature.addSupportedCategoricalEnumeration(categoryVoc);
|
197
|
}
|
198
|
|
199
|
//unit_fk
|
200
|
//TODO
|
201
|
|
202
|
}
|
203
|
|
204
|
|
205
|
/**
|
206
|
* @param state
|
207
|
* @param list
|
208
|
* @param listName
|
209
|
* @return
|
210
|
* @throws SQLException
|
211
|
*/
|
212
|
private TermVocabulary<State> makeCategoricalVocabulary(EdaphobaseImportState state, Integer list, String listName) throws SQLException {
|
213
|
TermVocabulary<State> result = TermVocabulary.NewInstance(TermType.State, listName, listName, null, null );
|
214
|
String sql = " SELECT * "
|
215
|
+ " FROM selective_list.element "
|
216
|
+ " WHERE list_fk = " + list
|
217
|
+ " ORDER BY path ";
|
218
|
ResultSet rs = state.getConfig().getSource().getResultSet(sql);
|
219
|
Map<Integer, State> map = new HashMap<>();
|
220
|
|
221
|
while (rs.next()){
|
222
|
Integer id = rs.getInt("element_id");
|
223
|
Integer parentFk = nullSafeInt(rs, "element_id");
|
224
|
String value = rs.getString("value_summary");
|
225
|
|
226
|
State term = State.NewInstance();
|
227
|
handleValueSummary(state, term, value, list, id);
|
228
|
if (parentFk != null && map.get(parentFk) != null){
|
229
|
State parent = map.get(parentFk);
|
230
|
term.setKindOf(term);
|
231
|
}
|
232
|
result.addTerm(term);
|
233
|
}
|
234
|
return result;
|
235
|
}
|
236
|
|
237
|
/**
|
238
|
* @param state
|
239
|
* @param term
|
240
|
* @param value
|
241
|
* @param list
|
242
|
*/
|
243
|
private void handleValueSummary(EdaphobaseImportState state, State term, String valueOrig, Integer list, Integer id) {
|
244
|
String sep = ", ";
|
245
|
String value = valueOrig;
|
246
|
String[] splits = value.split(sep);
|
247
|
String german;
|
248
|
String english;
|
249
|
String idInVoc = null;
|
250
|
//idInVoc
|
251
|
if (splits[0].length() <=2){
|
252
|
idInVoc = splits[0];
|
253
|
String[] newSplit = new String[splits.length - 1];
|
254
|
for (int i = 0; i < newSplit.length; i++){
|
255
|
newSplit[i] = splits[i+1];
|
256
|
}
|
257
|
splits = newSplit;
|
258
|
value = value.substring(idInVoc.length() + 1);
|
259
|
}else if (list == 27){
|
260
|
Matcher matcher = Pattern.compile("(.+)(\\(d[a-h]?\\))").matcher(splits[1]);
|
261
|
if (matcher.matches()){
|
262
|
splits[1] = matcher.group(1).trim();
|
263
|
idInVoc = matcher.group(2).replace("(", "").replace(")", "").trim();
|
264
|
}
|
265
|
}
|
266
|
if(value.contains(", indicator ")){
|
267
|
splits = value.split(", indicator ");
|
268
|
splits[1] = "indicator " + splits[1];
|
269
|
}
|
270
|
//
|
271
|
if (splits.length == 2){
|
272
|
german = splits[0];
|
273
|
english = splits[1];
|
274
|
}else if (splits.length == 4){
|
275
|
german = CdmUtils.concat(sep, splits[0], splits[1]);
|
276
|
english = CdmUtils.concat(sep, splits[2], splits[3]);
|
277
|
}else if (splits.length == 6){
|
278
|
german = CdmUtils.concat(sep, splits[0], splits[1], splits[2]);
|
279
|
english = CdmUtils.concat(sep, splits[3], splits[4], splits[5]);
|
280
|
}else{
|
281
|
logger.warn("Pattern for id = " + id + " could not be recognized: " + valueOrig);
|
282
|
}
|
283
|
//TODO 84, 95 (mit Einheit)
|
284
|
|
285
|
term.addRepresentation(Representation.NewInstance(german, german, null, Language.GERMAN()));
|
286
|
term.addRepresentation(Representation.NewInstance(english, english, null, Language.ENGLISH()));
|
287
|
term.setIdInVocabulary(idInVoc);
|
288
|
}
|
289
|
|
290
|
/**
|
291
|
* Checks if the value for these attributes is blank
|
292
|
* @param state
|
293
|
* @param rs
|
294
|
* @param strings
|
295
|
* @throws SQLException
|
296
|
*/
|
297
|
private void checkNullStr(EdaphobaseImportState state, ResultSet rs, int id, String ... cols) throws SQLException {
|
298
|
for (String col:cols){
|
299
|
String val = rs.getString(col);
|
300
|
if (StringUtils.isNotBlank(val)){
|
301
|
logger.warn("Column " + col + " is not empty as expected for observation.column.column_id = " + id);
|
302
|
}
|
303
|
}
|
304
|
}
|
305
|
|
306
|
@Override
|
307
|
public boolean doPartition(ResultSetPartitioner partitioner, EdaphobaseImportState state) {
|
308
|
ResultSet rs = partitioner.getResultSet();
|
309
|
@SuppressWarnings("rawtypes")
|
310
|
Set<TaxonBase> taxaToSave = new HashSet<>();
|
311
|
try {
|
312
|
while (rs.next()){
|
313
|
makeSingleTaxon(state, rs, taxaToSave);
|
314
|
}
|
315
|
} catch (SQLException | UndefinedTransformerMethodException e) {
|
316
|
e.printStackTrace();
|
317
|
}
|
318
|
|
319
|
getTaxonService().saveOrUpdate(taxaToSave);
|
320
|
return true;
|
321
|
}
|
322
|
|
323
|
/**
|
324
|
* @param state
|
325
|
* @param rs
|
326
|
* @param taxaToSave
|
327
|
* @throws SQLException
|
328
|
* @throws UndefinedTransformerMethodException
|
329
|
*/
|
330
|
private void makeSingleTaxon(EdaphobaseImportState state, ResultSet rs, Set<TaxonBase> taxaToSave)
|
331
|
throws SQLException, UndefinedTransformerMethodException {
|
332
|
Integer id = nullSafeInt(rs, "description_detail_id");
|
333
|
Integer taxonFk = nullSafeInt(rs, "taxon_fk");
|
334
|
TaxonBase<?> taxonBase = state.getRelatedObject(TAXON_NAMESPACE, String.valueOf(taxonFk), TaxonBase.class);
|
335
|
|
336
|
|
337
|
// //id
|
338
|
// ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
|
339
|
// ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
|
340
|
// handleExampleIdentifiers(taxonBase, id);
|
341
|
}
|
342
|
|
343
|
|
344
|
@Override
|
345
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs,
|
346
|
EdaphobaseImportState state) {
|
347
|
|
348
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
|
349
|
|
350
|
Set<String> taxonIdSet = new HashSet<>();
|
351
|
|
352
|
try {
|
353
|
while (rs.next()){
|
354
|
handleForeignKey(rs, taxonIdSet, "taxon_fk");
|
355
|
}
|
356
|
} catch (SQLException e) {
|
357
|
e.printStackTrace();
|
358
|
}
|
359
|
|
360
|
//reference map
|
361
|
String nameSpace = TAXON_NAMESPACE;
|
362
|
Class<?> cdmClass = TaxonBase.class;
|
363
|
Set<String> idSet = taxonIdSet;
|
364
|
Map<String, TaxonBase<?>> taxonMap = (Map<String, TaxonBase<?>>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
365
|
result.put(nameSpace, taxonMap);
|
366
|
|
367
|
return result;
|
368
|
}
|
369
|
|
370
|
|
371
|
@Override
|
372
|
protected boolean doCheck(EdaphobaseImportState state) {
|
373
|
return true;
|
374
|
}
|
375
|
|
376
|
@Override
|
377
|
protected boolean isIgnore(EdaphobaseImportState state) {
|
378
|
return ! state.getConfig().isDoDescriptions();
|
379
|
}
|
380
|
|
381
|
}
|