Project

General

Profile

Download (14.7 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2015 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.edaphobase;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.Map;
16
import java.util.Set;
17
import java.util.regex.Matcher;
18
import java.util.regex.Pattern;
19

    
20
import org.apache.commons.lang.StringUtils;
21
import org.apache.log4j.Logger;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.common.IPartitionedIO;
26
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.model.common.Language;
30
import eu.etaxonomy.cdm.model.common.Representation;
31
import eu.etaxonomy.cdm.model.common.TermType;
32
import eu.etaxonomy.cdm.model.common.TermVocabulary;
33
import eu.etaxonomy.cdm.model.description.Feature;
34
import eu.etaxonomy.cdm.model.description.State;
35
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
36

    
37
/**
38
 * @author a.mueller
39
 * @since 18.12.2015
40
 *
41
 */
42
@Component
43
public class EdaphobaseDescriptionImport extends EdaphobaseImportBase {
44
    private static final long serialVersionUID = -9138378836474086070L;
45
    private static final Logger logger = Logger.getLogger(EdaphobaseDescriptionImport.class);
46

    
47
    private static final String tableName = "description_detail";
48

    
49
    private static final String pluralString = "descriptions";
50

    
51

    
52
    /**
53
     * @param tableName
54
     * @param pluralString
55
     */
56
    public EdaphobaseDescriptionImport() {
57
        super(tableName, pluralString);
58
    }
59

    
60
    @Override
61
    protected String getIdQuery(EdaphobaseImportState state) {
62

    
63
        return " SELECT dd.description_detail_id " +
64
            " FROM observation.observation o " +
65
            " JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
66
            " JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
67
            " JOIN observation.column c ON dd.column_fk = c.column_id " +
68
            " LEFT JOIN observation.unit u ON dd.unit_fk = u.unit_id " +
69
            " WHERE o.first_description = true and o.taxon_fk is not null " +
70
            " ORDER BY dd.description_detail_id, o.taxon_fk";
71
    }
72

    
73
    @Override
74
    protected String getRecordQuery(EdaphobaseImportConfigurator config) {
75
        String result = " SELECT DISTINCT dd.description_detail_id, o.taxon_fk, c.name_de, dd.min_value, "
76
                + " dd.max_value, dd.value, u.name, dd.text, dd.list_element_fk " +
77
                " FROM observation.observation o " +
78
                    " JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
79
                    " JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
80
                    " JOIN observation.column c ON dd.column_fk = c.column_id " +
81
                    " LEFT JOIN observation.unit u ON dd.unit_fk = u.unit_id " +
82
                " WHERE o.first_description = true and o.taxon_fk is not null " +
83
                  " AND dd.description_detail_id IN (@IDSET)";
84

    
85
        result = result.replace("@IDSET", IPartitionedIO.ID_LIST_TOKEN);
86
        return result;
87
    }
88

    
89
    @Override
90
    protected void doInvoke(EdaphobaseImportState state) {
91
        doFeatures(state);
92
        super.doInvoke(state);
93
    }
94

    
95

    
96
    /**
97
     *
98
     */
99
    private void doFeatures(EdaphobaseImportState state) {
100
        String sql = " SELECT  c.*, l.list_id, l.is_hierarchical listIsHierarchical, l.name listName, l.attribute_order " +
101
                " FROM observation.column c LEFT OUTER JOIN selective_list.list l ON c.list_fk = l.list_id " +
102
                " WHERE c.column_id IN " +
103
                   " (SELECT dd.column_fk " +
104
                       " FROM observation.observation o " +
105
                       " JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
106
                       " JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
107
                       " WHERE o.first_description = true and o.taxon_fk is not null"
108
                   + ")";
109
        ResultSet rs = state.getConfig().getSource().getResultSet(sql);
110
        try {
111
            @SuppressWarnings("unchecked")
112
            TermVocabulary<Feature> vocQuant = TermVocabulary.NewInstance(TermType.Feature, "Edaphobase quantitative features", "Quantitative features", "quant.", null);
113
            vocQuant.setUuid(EdaphobaseImportTransformer.uuidVocFeatureQuantitative);
114
            @SuppressWarnings("unchecked")
115
            TermVocabulary<Feature> vocBiology = TermVocabulary.NewInstance(TermType.Feature, "Edaphobase biological features", "Biological features", "biol.", null);
116
            vocBiology.setUuid(EdaphobaseImportTransformer.uuidVocFeatureBiological);
117
            @SuppressWarnings("unchecked")
118
            TermVocabulary<Feature> vocMorphology = TermVocabulary.NewInstance(TermType.Feature, "Edaphobase morphological features", "Morphological features", "morph.", null);
119
            vocBiology.setUuid(EdaphobaseImportTransformer.uuidVocFeatureMorpho);
120

    
121
            while (rs.next()){
122
               handleSingleFeature(state, rs, vocQuant, vocBiology, vocMorphology);
123
            }
124
            getVocabularyService().save(vocQuant);
125
            getVocabularyService().save(vocBiology);
126
            getVocabularyService().save(vocMorphology);
127
        } catch (SQLException e) {
128
            e.printStackTrace();
129
        }
130
    }
131

    
132
    /**
133
     * @param state
134
     * @param rs
135
     * @param vocMorphology
136
     * @param vocBiology
137
     * @param vocQuant
138
     * @throws SQLException
139
     */
140
    private void handleSingleFeature(EdaphobaseImportState state, ResultSet rs,
141
            TermVocabulary<Feature> vocQuant, TermVocabulary<Feature> vocBiology,
142
            TermVocabulary<Feature> vocMorphology) throws SQLException {
143
        int id = rs.getInt("column_id");
144
        int minVal = rs.getInt("min_value");
145
        int maxVal = rs.getInt("max_value");  //null
146
        String nameDe = rs.getString("name_de");
147
        String nameEn = rs.getString("name_en");
148
        //required
149
        Integer unit_fk = nullSafeInt(rs, "unit_fk"); //18=mm; 47=mg
150
        int dataType = rs.getInt("data_type_fk");  //11475=Integer, 11476=Floating point, 11477=String, 11478=Categorical data
151
        Integer list = nullSafeInt(rs, "list_fk"); //allows null,  distinct values except for null
152
        int columnGroup = rs.getInt("column_group"); //11661=Quantität, 11663=Biology, 11664=Morphology
153
        String description = rs.getString("description");
154
        String description_en = rs.getString("description_en");
155
        //versionfield
156
        checkNullStr(state, rs, id, "regEx", "tableName", "scheme_name","query","pg_preview_query");
157
        //anonym (f, but once t)
158
        //only once (f, sometimes t)
159
        //inner_agg
160
        //inner_summary
161
        //hierarchical_level_fk //empty int
162
        //usings (json), some values
163

    
164
        String listName = rs.getString("listName");
165

    
166
        Feature feature = Feature.NewInstance(description_en, nameEn, null);
167
        feature.addRepresentation(Representation.NewInstance(description, nameDe, null, Language.GERMAN()));
168

    
169
        //columnGroup => vocabulary
170
        if (columnGroup == 11661){
171
           vocQuant.addTerm(feature);
172
        }else if (columnGroup == 11663){
173
            vocBiology.addTerm(feature);
174
         }else if (columnGroup == 11664){
175
             vocMorphology.addTerm(feature);
176
         }else {
177
             logger.error("Unhandled column group "+ columnGroup);
178
         }
179

    
180
        //dataType
181
        feature.setSupportsTextData(false);
182
        if (dataType == 11475 || dataType == 11476){
183
            feature.setSupportsQuantitativeData(true);
184
        }else if (dataType == 11477){
185
            feature.setSupportsTextData(true);
186
        }else if (dataType == 11478){
187
            feature.setSupportsCategoricalData(true);
188
        }else{
189
            logger.error("Unhandled dataType " +  dataType);
190
        }
191

    
192
        //list
193
        if (list != null){
194
            //TODO term type
195
            TermVocabulary<State> categoryVoc = makeCategoricalVocabulary(state, list, listName);
196
            feature.addSupportedCategoricalEnumeration(categoryVoc);
197
        }
198

    
199
        //unit_fk
200
        //TODO
201

    
202
    }
203

    
204

    
205
    /**
206
     * @param state
207
     * @param list
208
     * @param listName
209
     * @return
210
     * @throws SQLException
211
     */
212
    private TermVocabulary<State> makeCategoricalVocabulary(EdaphobaseImportState state, Integer list, String listName) throws SQLException {
213
        TermVocabulary<State> result = TermVocabulary.NewInstance(TermType.State, listName, listName, null, null );
214
        String sql = " SELECT * "
215
                + " FROM selective_list.element "
216
                + " WHERE list_fk = " + list
217
                + " ORDER BY path ";
218
        ResultSet rs = state.getConfig().getSource().getResultSet(sql);
219
        Map<Integer, State> map = new HashMap<>();
220

    
221
        while (rs.next()){
222
            Integer id = rs.getInt("element_id");
223
            Integer parentFk = nullSafeInt(rs, "element_id");
224
            String value = rs.getString("value_summary");
225

    
226
            State term = State.NewInstance();
227
            handleValueSummary(state, term, value, list, id);
228
            if (parentFk != null && map.get(parentFk) != null){
229
                State parent = map.get(parentFk);
230
                term.setKindOf(term);
231
            }
232
            result.addTerm(term);
233
        }
234
        return result;
235
    }
236

    
237
    /**
238
     * @param state
239
     * @param term
240
     * @param value
241
     * @param list
242
     */
243
    private void handleValueSummary(EdaphobaseImportState state, State term, String valueOrig, Integer list, Integer id) {
244
        String sep = ", ";
245
        String value = valueOrig;
246
        String[] splits = value.split(sep);
247
        String german;
248
        String english;
249
        String idInVoc = null;
250
        //idInVoc
251
        if (splits[0].length() <=2){
252
            idInVoc = splits[0];
253
            String[] newSplit = new String[splits.length - 1];
254
            for (int i = 0; i < newSplit.length; i++){
255
                newSplit[i] = splits[i+1];
256
            }
257
            splits = newSplit;
258
            value = value.substring(idInVoc.length() + 1);
259
        }else if (list == 27){
260
            Matcher matcher = Pattern.compile("(.+)(\\(d[a-h]?\\))").matcher(splits[1]);
261
            if (matcher.matches()){
262
                splits[1] = matcher.group(1).trim();
263
                idInVoc = matcher.group(2).replace("(", "").replace(")", "").trim();
264
            }
265
        }
266
        if(value.contains(", indicator ")){
267
            splits = value.split(", indicator ");
268
            splits[1] = "indicator " + splits[1];
269
        }
270
        //
271
        if (splits.length == 2){
272
            german = splits[0];
273
            english = splits[1];
274
        }else if (splits.length == 4){
275
            german = CdmUtils.concat(sep, splits[0], splits[1]);
276
            english = CdmUtils.concat(sep, splits[2], splits[3]);
277
        }else if (splits.length == 6){
278
            german = CdmUtils.concat(sep, splits[0], splits[1], splits[2]);
279
            english = CdmUtils.concat(sep, splits[3], splits[4], splits[5]);
280
        }else{
281
            logger.warn("Pattern for id = " + id + " could not be recognized: " + valueOrig);
282
        }
283
        //TODO 84, 95  (mit Einheit)
284

    
285
        term.addRepresentation(Representation.NewInstance(german, german, null, Language.GERMAN()));
286
        term.addRepresentation(Representation.NewInstance(english, english, null, Language.ENGLISH()));
287
        term.setIdInVocabulary(idInVoc);
288
    }
289

    
290
    /**
291
     * Checks if the value for these attributes is blank
292
     * @param state
293
     * @param rs
294
     * @param strings
295
     * @throws SQLException
296
     */
297
    private void checkNullStr(EdaphobaseImportState state, ResultSet rs, int id, String ... cols) throws SQLException {
298
        for (String col:cols){
299
            String val = rs.getString(col);
300
            if (StringUtils.isNotBlank(val)){
301
                logger.warn("Column " + col + " is not empty as expected for observation.column.column_id = " + id);
302
            }
303
        }
304
    }
305

    
306
    @Override
307
    public boolean doPartition(ResultSetPartitioner partitioner, EdaphobaseImportState state) {
308
        ResultSet rs = partitioner.getResultSet();
309
        @SuppressWarnings("rawtypes")
310
        Set<TaxonBase> taxaToSave = new HashSet<>();
311
        try {
312
            while (rs.next()){
313
                makeSingleTaxon(state, rs, taxaToSave);
314
            }
315
        } catch (SQLException | UndefinedTransformerMethodException e) {
316
             e.printStackTrace();
317
        }
318

    
319
        getTaxonService().saveOrUpdate(taxaToSave);
320
        return true;
321
    }
322

    
323
    /**
324
     * @param state
325
     * @param rs
326
     * @param taxaToSave
327
     * @throws SQLException
328
     * @throws UndefinedTransformerMethodException
329
     */
330
    private void makeSingleTaxon(EdaphobaseImportState state, ResultSet rs, Set<TaxonBase> taxaToSave)
331
            throws SQLException, UndefinedTransformerMethodException {
332
        Integer id = nullSafeInt(rs, "description_detail_id");
333
        Integer taxonFk = nullSafeInt(rs, "taxon_fk");
334
        TaxonBase<?> taxonBase = state.getRelatedObject(TAXON_NAMESPACE, String.valueOf(taxonFk), TaxonBase.class);
335

    
336

    
337
//        //id
338
//        ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
339
//        ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
340
//        handleExampleIdentifiers(taxonBase, id);
341
    }
342

    
343

    
344
    @Override
345
    public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs,
346
            EdaphobaseImportState state) {
347

    
348
        Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
349

    
350
        Set<String> taxonIdSet = new HashSet<>();
351

    
352
        try {
353
            while (rs.next()){
354
                handleForeignKey(rs, taxonIdSet, "taxon_fk");
355
            }
356
        } catch (SQLException e) {
357
            e.printStackTrace();
358
        }
359

    
360
        //reference map
361
        String nameSpace = TAXON_NAMESPACE;
362
        Class<?> cdmClass = TaxonBase.class;
363
        Set<String> idSet = taxonIdSet;
364
        Map<String, TaxonBase<?>> taxonMap = (Map<String, TaxonBase<?>>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
365
        result.put(nameSpace, taxonMap);
366

    
367
        return result;
368
    }
369

    
370

    
371
    @Override
372
    protected boolean doCheck(EdaphobaseImportState state) {
373
        return true;
374
    }
375

    
376
    @Override
377
    protected boolean isIgnore(EdaphobaseImportState state) {
378
        return ! state.getConfig().isDoDescriptions();
379
    }
380

    
381
}
(3-3/12)