Project

General

Profile

Download (14.4 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.redlist.germanSL;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16
import java.util.UUID;
17

    
18
import org.apache.log4j.Logger;
19
import org.springframework.stereotype.Component;
20

    
21
import eu.etaxonomy.cdm.common.CdmUtils;
22
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
23
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
24
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
25
import eu.etaxonomy.cdm.model.common.CdmBase;
26
import eu.etaxonomy.cdm.model.common.Language;
27
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
28
import eu.etaxonomy.cdm.model.description.TaxonDescription;
29
import eu.etaxonomy.cdm.model.location.Country;
30
import eu.etaxonomy.cdm.model.name.IBotanicalName;
31
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
32
import eu.etaxonomy.cdm.model.name.Rank;
33
import eu.etaxonomy.cdm.model.name.RankClass;
34
import eu.etaxonomy.cdm.model.name.TaxonName;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
37
import eu.etaxonomy.cdm.model.taxon.Synonym;
38
import eu.etaxonomy.cdm.model.taxon.Taxon;
39
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40
import eu.etaxonomy.cdm.model.term.DefinedTerm;
41
import eu.etaxonomy.cdm.model.term.OrderedTermVocabulary;
42

    
43
/**
44
 * @author a.mueller
45
 * @since 25.11.2016
46
 *
47
 */
48
@Component
49
public class GermanSLTaxonImport
50
            extends GermanSLImporBase {
51

    
52
    private static final long serialVersionUID = 236093186271666895L;
53

    
54
    private static final Logger logger = Logger.getLogger(GermanSLTaxonImport.class);
55

    
56
    static final String SPECIES_NR = "SPECIES_NR";
57
    private static final String AUTHOR = "AUTHOR";
58
    private static final String ABBREVIAT = "ABBREVIAT";
59
    private static final String SEC = "SECUNDUM";
60
    private static final String RANG = "RANG";
61
    private static final String EXTERNAL_ID = "external_ID";
62
    private static final String GRUPPE = "GRUPPE";
63
    static final String VALID_NR = "VALID_NR";
64
    static final String SYNONYM = "SYNONYM";
65
    private static final String NATIVENAME = "NATIVENAME";
66
    private static final String LETTER_CODE = "LETTERCODE";
67
    static final String AGG = "AGG";
68

    
69
    private static final String AGG_NAME = "AGG_NAME";
70
    private static final String VALID_NAME = "VALID_NAME";
71

    
72
    private static final String NACHWEIS = "NACHWEIS";
73
    private static final String HYBRID = "HYBRID";
74
    private static final String BEGRUEND = "BEGRUEND";
75
    private static final String EDITSTATUS = "EDITSTATUS";
76

    
77
    private static final String UUID_ = "UUID";
78

    
79

    
80
    public static final String TAXON_NAMESPACE = "1.3.4";
81

    
82
    @SuppressWarnings("unchecked")
83
    protected ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper
84
           = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewStandaloneInstance();
85

    
86

    
87
    @Override
88
    protected String getWorksheetName(GermanSLImportConfigurator config) {
89
        return "1.3.4";
90
    }
91

    
92
    //dirty I know, but who cares, needed by distribution and common name import
93
    protected static final Map<String, TaxonBase<?>> taxonIdMap = new HashMap<>();
94

    
95

    
96
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
97
            SPECIES_NR,EXTERNAL_ID,ABBREVIAT,
98
            AUTHOR,SEC,SYNONYM,
99
            LETTER_CODE, AGG,
100
            NATIVENAME,VALID_NR,RANG,GRUPPE,
101
            UUID_,
102
            NACHWEIS, HYBRID, BEGRUEND, EDITSTATUS, AGG_NAME, VALID_NAME
103
        });
104

    
105

    
106
    @Override
107
    protected void firstPass(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
108
        String line = state.getCurrentLine() + ": ";
109
        Map<String, String> record = state.getOriginalRecord();
110

    
111
        Set<String> keys = record.keySet();
112

    
113
        checkAllKeysExist(line, keys, expectedKeys);
114

    
115
        //Name
116
        NameResult nameResult = makeName(line, record, state);
117
        IBotanicalName taxonName = nameResult.name;
118

    
119
        //sec
120
        String secRefStr = getValue(record, SEC);
121
        Reference sec = getSecRef(state, secRefStr, line);
122

    
123
        //status
124
        String statusStr = getValue(record, SYNONYM);
125
        TaxonBase<?> taxonBase;
126
        if (isAccepted(statusStr, nameResult)){
127
            taxonBase = Taxon.NewInstance(taxonName, sec);
128
//            if (nameResult.proParte){
129
//                logger.warn(line + "accepted taxon can not be pro parte in GermanSL");
130
//            }
131
        }else{
132
            Synonym syn = Synonym.NewInstance(taxonName, sec);
133
//            if (nameResult.proParte){
134
//                syn.setProParte(true);
135
//            }
136
            taxonBase = syn;
137
        }
138
        if (!isBlank(nameResult.sensu)){
139
            taxonBase.setAppendedPhrase(nameResult.sensu);
140
        }
141
        //TODO right order?
142
        taxonBase.setAppendedPhrase(CdmUtils.concat(" ", nameResult.auct, taxonBase.getAppendedPhrase()));
143

    
144
        //lettercode
145
        String lettercode = getValue(record, LETTER_CODE);
146
        if (isNotBlank(lettercode)){
147
            UUID idTypeUUID;
148
            try {
149
                idTypeUUID = state.getTransformer().getIdentifierTypeUuid("LETTERCODE");
150
                DefinedTerm idType = getIdentiferType(state, idTypeUUID, "GermanSL lettercode", "GermanSL lettercode", "LETTERCODE", null);
151
                taxonBase.addIdentifier(lettercode, idType);
152
            } catch (UndefinedTransformerMethodException e) {
153
               e.printStackTrace();
154
            }
155
        }
156

    
157
//        //annotation
158
//        String annotation = getValue(record, "Anotacion al Taxon");
159
//        if (annotation != null && (!annotation.equals("nom. illeg.") || !annotation.equals("nom. cons."))){
160
//            taxonBase.addAnnotation(Annotation.NewInstance(annotation, AnnotationType.EDITORIAL(), Language.SPANISH_CASTILIAN()));
161
//        }
162

    
163
        //UUID
164
        String uuid = getValue(record, UUID_);
165
        //TOOD why sometimes null?
166
        if (uuid != null){
167
            taxonBase.setUuid(UUID.fromString(uuid));
168
        }
169

    
170

    
171
        //NATIVE NAME
172
        String commonNameStr = getValue(record, NATIVENAME);
173
        //Ann.: synonym common names should be removed!
174
        if (isNotBlank(commonNameStr)){
175
            makeCommonName(commonNameStr, taxonBase, line);
176
        }
177

    
178

    
179
        //id
180
        String id = getValue(record, SPECIES_NR);
181
        this.addOriginalSource(taxonBase, id, TAXON_NAMESPACE, getSourceReference(state));
182

    
183
        //save
184
        getTaxonService().saveOrUpdate(taxonBase);
185
        saveNameRelations(taxonBase.getName());
186
        taxonIdMap.put(id, taxonBase);
187
    }
188

    
189

    
190

    
191
    private String removeProparte(String authorStr) {
192
        String regEx = "\\s+p\\.\\s*p\\.$";
193
        if (authorStr == null || !authorStr.matches(".*" + regEx)){
194
            return authorStr;
195
        }else{
196
            return authorStr.replaceAll(regEx, "");
197
        }
198
    }
199

    
200
    private String removeSensuLatoStricto(String authorStr) {
201
        String regEx = "\\s+s\\.\\s*(l|str)\\.$";
202

    
203
        if (authorStr == null || !authorStr.matches(".*" + regEx)){
204
            return authorStr;
205
        }else{
206
            return authorStr.replaceAll(regEx, "");
207
        }
208
    }
209

    
210
    private String removeAuct(String authorStr) {
211
        String regEx = "auct\\.\\??$";
212

    
213
        if (authorStr == null || !authorStr.matches(/*".*" + */regEx)){
214
            return authorStr;
215
        }else{
216
            return ""; //authorStr.replaceAll(regEx, "");
217
        }
218
    }
219

    
220

    
221
    /**
222
     * @param state
223
     * @param secRefStr
224
     * @return
225
     */
226
    private Reference getSecRef(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, String secRefStr, String line) {
227
        Reference result = state.getReference(secRefStr);
228
        if (result == null && secRefStr != null){
229
            result = ReferenceFactory.newGeneric();
230
            result.setTitleCache(secRefStr, true);
231
            state.putReference(secRefStr, result);
232
        }
233

    
234
        return result;
235
    }
236

    
237

    
238

    
239
    /**
240
     * @param record
241
     * @param state
242
     * @return
243
     */
244
    public NameResult makeName(String line, Map<String, String> record, SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
245

    
246
        String specieNrStr = getValue(record, SPECIES_NR);
247
        String nameStr = getValue(record, ABBREVIAT);
248
        String authorStr = getValue(record, AUTHOR);
249
        String rankStr = getValue(record, RANG);
250

    
251
        NameResult result = new NameResult();
252

    
253
        //rank
254
        Rank rank = makeRank(line, state, rankStr);
255

    
256
        //name
257
        nameStr = normalizeNameStr(nameStr);
258
        String nameStrWithoutSensu = removeSensuLatoStricto(nameStr);
259
        if (nameStrWithoutSensu.length() < nameStr.length()){
260
            result.sensu = nameStr.substring(nameStrWithoutSensu.length()).trim();
261
            nameStr = nameStrWithoutSensu;
262
        }
263

    
264
        //author
265
        //pp
266
        authorStr = normalizeAuthorStr(authorStr);
267
        String authorStrWithoutProParte = removeProparte(authorStr);
268
        result.proParte = authorStrWithoutProParte.length() < authorStr.length();
269
        authorStr = authorStrWithoutProParte;
270

    
271
        //auct.
272
        String authorStrWithoutAuct = removeAuct(authorStr);
273
        if (authorStrWithoutAuct.length() < authorStr.length()){
274
            result.auct = authorStr.substring(authorStrWithoutAuct.length()).trim();
275
        }
276
        authorStr = authorStrWithoutAuct;
277

    
278

    
279
        //name+author
280
        String fullNameStr = CdmUtils.concat(" ", nameStr, authorStr);
281

    
282
        IBotanicalName fullName = (IBotanicalName)nameParser.parseReferencedName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
283
        if (fullName.isProtectedTitleCache()){
284
            logger.warn(line + "Name could not be parsed: " + fullNameStr );
285
        }else{
286
            getDeduplicationHelper(state).replaceAuthorNamesAndNomRef(state, fullName);
287
//            replaceAuthorNamesAndNomRef(state, fullName);
288
        }
289
//        BotanicalName existingName = getExistingName(state, fullName);
290

    
291
        //TODO handle existing name
292
        IBotanicalName name = fullName;
293
        this.addOriginalSource(name, specieNrStr, TAXON_NAMESPACE + "_Name", getSourceReference(state));
294

    
295
        result.name = name;
296
        return result;
297
    }
298

    
299
    /**
300
     * @param state
301
     * @return
302
     */
303
    protected ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> getDeduplicationHelper(SimpleExcelTaxonImportState<?> state) {
304
        if (deduplicationHelper == null){
305
            deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
306
        }
307
        return deduplicationHelper;
308
    }
309

    
310

    
311

    
312
    /**
313
     * @param line
314
     * @param state
315
     * @param rankStr
316
     * @return
317
     */
318
    private Rank makeRank(String line, SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, String rankStr) {
319
        Rank rank = null;
320
        try {
321
            rank = state.getTransformer().getRankByKey(rankStr);
322
            if (rank == null){
323
                UUID rankUuid = state.getTransformer().getRankUuid(rankStr);
324
                OrderedTermVocabulary<Rank> voc = (OrderedTermVocabulary<Rank>)Rank.SPECIES().getVocabulary();
325
                //TODO
326
                Rank lowerRank = Rank.FORM();
327
                rank = getRank(state, rankUuid, rankStr, rankStr, rankStr, voc, lowerRank, RankClass.Infraspecific);
328
                if (rank == null){
329
                    logger.warn(line + "Rank not recognized: " + rankStr);
330
                }
331
            }
332
        } catch (Exception e1) {
333
                logger.warn(line + "Exception when trying to define rank '" + rankStr + "': " + e1.getMessage());
334
                e1.printStackTrace();
335
        }
336
        return rank;
337
    }
338

    
339

    
340
    /**
341
     * @param authorStr
342
     * @return
343
     */
344
    private String normalizeAuthorStr(String authorStr) {
345
        if (isBlank(authorStr)){
346
            return "";
347
        }else{
348
            if (authorStr.equals("-") || authorStr.equals("#")){
349
                authorStr = "";
350
            }
351
            return authorStr;
352
        }
353
    }
354

    
355
    private String normalizeNameStr(String nameStr) {
356
        nameStr = nameStr
357
                .replace(" agg.", " aggr.")
358
                .replace(" fo. ", " f. ")
359
             ;
360
        return nameStr;
361
    }
362

    
363

    
364
    boolean nameMapIsInitialized = false;
365
    /**
366
     * @param state
367
     * @param fullName
368
     * @return
369
     */
370
    private IBotanicalName getExistingName(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, IBotanicalName fullName) {
371
        initExistinNames(state);
372
        return (IBotanicalName)state.getName(fullName.getTitleCache());
373
    }
374

    
375
    /**
376
     * @param state
377
     */
378
    @SuppressWarnings("rawtypes")
379
    private void initExistinNames(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
380
        if (!nameMapIsInitialized){
381
            List<String> propertyPaths = Arrays.asList("");
382
            List<TaxonName> existingNames = this.getNameService().list(null, null, null, null, propertyPaths);
383
            for (TaxonName tnb : existingNames){
384
                state.putName(tnb.getTitleCache(), tnb);
385
            }
386
            nameMapIsInitialized = true;
387
        }
388
    }
389

    
390

    
391
    /**
392
     * @param commmonNameStr
393
     * @param taxonBase
394
     */
395
    private void makeCommonName(String commmonNameStr, TaxonBase<?> taxonBase, String line) {
396
        if (taxonBase.isInstanceOf(Synonym.class)){
397
            //synonym common names should be neglected
398
            return;
399
        }
400
        Taxon acceptedTaxon = getAccepted(taxonBase);
401
        if (acceptedTaxon != null){
402
            TaxonDescription desc = getTaxonDescription(acceptedTaxon, false, true);
403
            desc.setDefault(true);
404
            CommonTaxonName commonName = CommonTaxonName.NewInstance(commmonNameStr, Language.GERMAN(), Country.GERMANY());
405
            desc.addElement(commonName);
406
        }else{
407
            logger.warn(line + "No accepted taxon available");
408
        }
409

    
410
    }
411

    
412

    
413
    /**
414
     * @param next
415
     * @return
416
     */
417
    private Taxon getAccepted(TaxonBase<?> taxonBase) {
418
        if (taxonBase.isInstanceOf(Taxon.class)){
419
            return CdmBase.deproxy(taxonBase, Taxon.class);
420
        }else{
421
            Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
422
            return syn.getAcceptedTaxon();
423
        }
424
    }
425

    
426

    
427
    @Override
428
    protected boolean isIgnore(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
429
        return ! state.getConfig().isDoTaxa();
430
    }
431
}
(3-3/5)