Project

General

Profile

Download (14.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2015 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.edaphobase;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.Map;
16
import java.util.Set;
17
import java.util.regex.Matcher;
18
import java.util.regex.Pattern;
19

    
20
import org.apache.commons.lang.StringUtils;
21
import org.apache.log4j.Logger;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.common.IPartitionedIO;
26
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.Language;
29
import eu.etaxonomy.cdm.model.description.Feature;
30
import eu.etaxonomy.cdm.model.description.State;
31
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
32
import eu.etaxonomy.cdm.model.term.Representation;
33
import eu.etaxonomy.cdm.model.term.TermType;
34
import eu.etaxonomy.cdm.model.term.TermVocabulary;
35

    
36
/**
37
 * @author a.mueller
38
 * @since 18.12.2015
39
 *
40
 */
41
@Component
42
public class EdaphobaseDescriptionImport extends EdaphobaseImportBase {
43
    private static final long serialVersionUID = -9138378836474086070L;
44
    private static final Logger logger = Logger.getLogger(EdaphobaseDescriptionImport.class);
45

    
46
    private static final String tableName = "description_detail";
47

    
48
    private static final String pluralString = "descriptions";
49

    
50

    
51
    /**
52
     * @param tableName
53
     * @param pluralString
54
     */
55
    public EdaphobaseDescriptionImport() {
56
        super(tableName, pluralString);
57
    }
58

    
59
    @Override
60
    protected String getIdQuery(EdaphobaseImportState state) {
61

    
62
        return " SELECT dd.description_detail_id " +
63
            " FROM observation.observation o " +
64
            " JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
65
            " JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
66
            " JOIN observation.column c ON dd.column_fk = c.column_id " +
67
            " LEFT JOIN observation.unit u ON dd.unit_fk = u.unit_id " +
68
            " WHERE o.first_description = true and o.taxon_fk is not null " +
69
            " ORDER BY dd.description_detail_id, o.taxon_fk";
70
    }
71

    
72
    @Override
73
    protected String getRecordQuery(EdaphobaseImportConfigurator config) {
74
        String result = " SELECT DISTINCT dd.description_detail_id, o.taxon_fk, c.name_de, dd.min_value, "
75
                + " dd.max_value, dd.value, u.name, dd.text, dd.list_element_fk " +
76
                " FROM observation.observation o " +
77
                    " JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
78
                    " JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
79
                    " JOIN observation.column c ON dd.column_fk = c.column_id " +
80
                    " LEFT JOIN observation.unit u ON dd.unit_fk = u.unit_id " +
81
                " WHERE o.first_description = true and o.taxon_fk is not null " +
82
                  " AND dd.description_detail_id IN (@IDSET)";
83

    
84
        result = result.replace("@IDSET", IPartitionedIO.ID_LIST_TOKEN);
85
        return result;
86
    }
87

    
88
    @Override
89
    protected void doInvoke(EdaphobaseImportState state) {
90
        doFeatures(state);
91
        super.doInvoke(state);
92
    }
93

    
94

    
95
    /**
96
     *
97
     */
98
    private void doFeatures(EdaphobaseImportState state) {
99
        String sql = " SELECT  c.*, l.list_id, l.is_hierarchical listIsHierarchical, l.name listName, l.attribute_order " +
100
                " FROM observation.column c LEFT OUTER JOIN selective_list.list l ON c.list_fk = l.list_id " +
101
                " WHERE c.column_id IN " +
102
                   " (SELECT dd.column_fk " +
103
                       " FROM observation.observation o " +
104
                       " JOIN observation.observation2description o2d ON o.observation_id = o2d.observation_fk " +
105
                       " JOIN observation.description_detail dd ON o2d.description_fk = dd.description_fk " +
106
                       " WHERE o.first_description = true and o.taxon_fk is not null"
107
                   + ")";
108
        ResultSet rs = state.getConfig().getSource().getResultSet(sql);
109
        try {
110
            TermVocabulary<Feature> vocQuant = TermVocabulary.NewInstance(TermType.Feature, Feature.class,
111
                    "Edaphobase quantitative features", "Quantitative features", "quant.", null);
112
            vocQuant.setUuid(EdaphobaseImportTransformer.uuidVocFeatureQuantitative);
113
            TermVocabulary<Feature> vocBiology = TermVocabulary.NewInstance(TermType.Feature, Feature.class, "Edaphobase biological features", "Biological features", "biol.", null);
114
            vocBiology.setUuid(EdaphobaseImportTransformer.uuidVocFeatureBiological);
115
            TermVocabulary<Feature> vocMorphology = TermVocabulary.NewInstance(TermType.Feature, Feature.class, "Edaphobase morphological features", "Morphological features", "morph.", null);
116
            vocBiology.setUuid(EdaphobaseImportTransformer.uuidVocFeatureMorpho);
117

    
118
            while (rs.next()){
119
               handleSingleFeature(state, rs, vocQuant, vocBiology, vocMorphology);
120
            }
121
            getVocabularyService().save(vocQuant);
122
            getVocabularyService().save(vocBiology);
123
            getVocabularyService().save(vocMorphology);
124
        } catch (SQLException e) {
125
            e.printStackTrace();
126
        }
127
    }
128

    
129
    /**
130
     * @param state
131
     * @param rs
132
     * @param vocMorphology
133
     * @param vocBiology
134
     * @param vocQuant
135
     * @throws SQLException
136
     */
137
    private void handleSingleFeature(EdaphobaseImportState state, ResultSet rs,
138
            TermVocabulary<Feature> vocQuant, TermVocabulary<Feature> vocBiology,
139
            TermVocabulary<Feature> vocMorphology) throws SQLException {
140
        int id = rs.getInt("column_id");
141
        int minVal = rs.getInt("min_value");
142
        int maxVal = rs.getInt("max_value");  //null
143
        String nameDe = rs.getString("name_de");
144
        String nameEn = rs.getString("name_en");
145
        //required
146
        Integer unit_fk = nullSafeInt(rs, "unit_fk"); //18=mm; 47=mg
147
        int dataType = rs.getInt("data_type_fk");  //11475=Integer, 11476=Floating point, 11477=String, 11478=Categorical data
148
        Integer list = nullSafeInt(rs, "list_fk"); //allows null,  distinct values except for null
149
        int columnGroup = rs.getInt("column_group"); //11661=Quantität, 11663=Biology, 11664=Morphology
150
        String description = rs.getString("description");
151
        String description_en = rs.getString("description_en");
152
        //versionfield
153
        checkNullStr(state, rs, id, "regEx", "tableName", "scheme_name","query","pg_preview_query");
154
        //anonym (f, but once t)
155
        //only once (f, sometimes t)
156
        //inner_agg
157
        //inner_summary
158
        //hierarchical_level_fk //empty int
159
        //usings (json), some values
160

    
161
        String listName = rs.getString("listName");
162

    
163
        Feature feature = Feature.NewInstance(description_en, nameEn, null);
164
        feature.addRepresentation(Representation.NewInstance(description, nameDe, null, Language.GERMAN()));
165

    
166
        //columnGroup => vocabulary
167
        if (columnGroup == 11661){
168
           vocQuant.addTerm(feature);
169
        }else if (columnGroup == 11663){
170
            vocBiology.addTerm(feature);
171
         }else if (columnGroup == 11664){
172
             vocMorphology.addTerm(feature);
173
         }else {
174
             logger.error("Unhandled column group "+ columnGroup);
175
         }
176

    
177
        //dataType
178
        feature.setSupportsTextData(false);
179
        if (dataType == 11475 || dataType == 11476){
180
            feature.setSupportsQuantitativeData(true);
181
        }else if (dataType == 11477){
182
            feature.setSupportsTextData(true);
183
        }else if (dataType == 11478){
184
            feature.setSupportsCategoricalData(true);
185
        }else{
186
            logger.error("Unhandled dataType " +  dataType);
187
        }
188

    
189
        //list
190
        if (list != null){
191
            //TODO term type
192
            TermVocabulary<State> categoryVoc = makeCategoricalVocabulary(state, list, listName);
193
            feature.addSupportedCategoricalEnumeration(categoryVoc);
194
        }
195

    
196
        //unit_fk
197
        //TODO
198

    
199
    }
200

    
201

    
202
    /**
203
     * @param state
204
     * @param list
205
     * @param listName
206
     * @return
207
     * @throws SQLException
208
     */
209
    private TermVocabulary<State> makeCategoricalVocabulary(EdaphobaseImportState state, Integer list, String listName) throws SQLException {
210
        TermVocabulary<State> result = TermVocabulary.NewInstance(TermType.State,
211
                State.class, listName, listName, null, null );
212
        String sql = " SELECT * "
213
                + " FROM selective_list.element "
214
                + " WHERE list_fk = " + list
215
                + " ORDER BY path ";
216
        ResultSet rs = state.getConfig().getSource().getResultSet(sql);
217
        Map<Integer, State> map = new HashMap<>();
218

    
219
        while (rs.next()){
220
            Integer id = rs.getInt("element_id");
221
            Integer parentFk = nullSafeInt(rs, "element_id");
222
            String value = rs.getString("value_summary");
223

    
224
            State term = State.NewInstance();
225
            handleValueSummary(state, term, value, list, id);
226
            if (parentFk != null && map.get(parentFk) != null){
227
                State parent = map.get(parentFk);
228
                term.setKindOf(term);
229
            }
230
            result.addTerm(term);
231
        }
232
        return result;
233
    }
234

    
235
    /**
236
     * @param state
237
     * @param term
238
     * @param value
239
     * @param list
240
     */
241
    private void handleValueSummary(EdaphobaseImportState state, State term, String valueOrig, Integer list, Integer id) {
242
        String sep = ", ";
243
        String value = valueOrig;
244
        String[] splits = value.split(sep);
245
        String german;
246
        String english;
247
        String idInVoc = null;
248
        //idInVoc
249
        if (splits[0].length() <=2){
250
            idInVoc = splits[0];
251
            String[] newSplit = new String[splits.length - 1];
252
            for (int i = 0; i < newSplit.length; i++){
253
                newSplit[i] = splits[i+1];
254
            }
255
            splits = newSplit;
256
            value = value.substring(idInVoc.length() + 1);
257
        }else if (list == 27){
258
            Matcher matcher = Pattern.compile("(.+)(\\(d[a-h]?\\))").matcher(splits[1]);
259
            if (matcher.matches()){
260
                splits[1] = matcher.group(1).trim();
261
                idInVoc = matcher.group(2).replace("(", "").replace(")", "").trim();
262
            }
263
        }
264
        if(value.contains(", indicator ")){
265
            splits = value.split(", indicator ");
266
            splits[1] = "indicator " + splits[1];
267
        }
268
        //
269
        if (splits.length == 2){
270
            german = splits[0];
271
            english = splits[1];
272
        }else if (splits.length == 4){
273
            german = CdmUtils.concat(sep, splits[0], splits[1]);
274
            english = CdmUtils.concat(sep, splits[2], splits[3]);
275
        }else if (splits.length == 6){
276
            german = CdmUtils.concat(sep, splits[0], splits[1], splits[2]);
277
            english = CdmUtils.concat(sep, splits[3], splits[4], splits[5]);
278
        }else{
279
            logger.warn("Pattern for id = " + id + " could not be recognized: " + valueOrig);
280
        }
281
        //TODO 84, 95  (mit Einheit)
282

    
283
        logger.error("addRepresentation has been commented because 'german' and 'english' not yet handled correctly");
284
//        term.addRepresentation(Representation.NewInstance(german, german, null, Language.GERMAN()));
285
//        term.addRepresentation(Representation.NewInstance(english, english, null, Language.ENGLISH()));
286
//        term.setIdInVocabulary(idInVoc);
287
    }
288

    
289
    /**
290
     * Checks if the value for these attributes is blank
291
     * @param state
292
     * @param rs
293
     * @param strings
294
     * @throws SQLException
295
     */
296
    private void checkNullStr(EdaphobaseImportState state, ResultSet rs, int id, String ... cols) throws SQLException {
297
        for (String col:cols){
298
            String val = rs.getString(col);
299
            if (StringUtils.isNotBlank(val)){
300
                logger.warn("Column " + col + " is not empty as expected for observation.column.column_id = " + id);
301
            }
302
        }
303
    }
304

    
305
    @Override
306
    public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, EdaphobaseImportState state) {
307
        ResultSet rs = partitioner.getResultSet();
308
        @SuppressWarnings("rawtypes")
309
        Set<TaxonBase> taxaToSave = new HashSet<>();
310
        try {
311
            while (rs.next()){
312
                makeSingleTaxon(state, rs, taxaToSave);
313
            }
314
        } catch (SQLException e) {
315
             e.printStackTrace();
316
        }
317

    
318
        getTaxonService().saveOrUpdate(taxaToSave);
319
        return true;
320
    }
321

    
322
    private void makeSingleTaxon(EdaphobaseImportState state, ResultSet rs, @SuppressWarnings("rawtypes") Set<TaxonBase> taxaToSave)
323
            throws SQLException {
324
        Integer id = nullSafeInt(rs, "description_detail_id");
325
        Integer taxonFk = nullSafeInt(rs, "taxon_fk");
326
        TaxonBase<?> taxonBase = state.getRelatedObject(TAXON_NAMESPACE, String.valueOf(taxonFk), TaxonBase.class);
327

    
328

    
329
//        //id
330
//        ImportHelper.setOriginalSource(taxonBase, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
331
//        ImportHelper.setOriginalSource(name, state.getTransactionalSourceReference(), id, TAXON_NAMESPACE);
332
//        handleExampleIdentifiers(taxonBase, id);
333
    }
334

    
335

    
336
    @Override
337
    public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs,
338
            EdaphobaseImportState state) {
339

    
340
        Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
341
        Set<String> taxonIdSet = new HashSet<>();
342

    
343
        try {
344
            while (rs.next()){
345
                handleForeignKey(rs, taxonIdSet, "taxon_fk");
346
            }
347
        } catch (SQLException e) {
348
            e.printStackTrace();
349
        }
350

    
351
        //reference map
352
        String nameSpace = TAXON_NAMESPACE;
353
        Set<String> idSet = taxonIdSet;
354
        @SuppressWarnings("rawtypes")
355
        Map<String, TaxonBase> taxonMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonBase.class, idSet, nameSpace);
356
        result.put(nameSpace, taxonMap);
357

    
358
        return result;
359
    }
360

    
361
    @Override
362
    protected boolean doCheck(EdaphobaseImportState state) {
363
        return true;
364
    }
365

    
366
    @Override
367
    protected boolean isIgnore(EdaphobaseImportState state) {
368
        return ! state.getConfig().isDoDescriptions();
369
    }
370
}
(3-3/12)