Project

General

Profile

Download (15.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.redlist.germanSL;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16
import java.util.UUID;
17

    
18
import org.apache.log4j.Logger;
19
import org.springframework.stereotype.Component;
20

    
21
import eu.etaxonomy.cdm.common.CdmUtils;
22
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
23
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
24
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
25
import eu.etaxonomy.cdm.model.common.CdmBase;
26
import eu.etaxonomy.cdm.model.common.DefinedTerm;
27
import eu.etaxonomy.cdm.model.common.Language;
28
import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
29
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
30
import eu.etaxonomy.cdm.model.description.TaxonDescription;
31
import eu.etaxonomy.cdm.model.location.Country;
32
import eu.etaxonomy.cdm.model.name.BotanicalName;
33
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
34
import eu.etaxonomy.cdm.model.name.Rank;
35
import eu.etaxonomy.cdm.model.name.RankClass;
36
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
37
import eu.etaxonomy.cdm.model.reference.Reference;
38
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
39
import eu.etaxonomy.cdm.model.taxon.Synonym;
40
import eu.etaxonomy.cdm.model.taxon.Taxon;
41
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42

    
43
/**
44
 * @author a.mueller
45
 * @date 25.11.2016
46
 *
47
 */
48
@Component
49
public class GermanSLTaxonImport<CONFIG extends GermanSLImportConfigurator>
50
            extends SimpleExcelTaxonImport<CONFIG> {
51

    
52
    private static final long serialVersionUID = 236093186271666895L;
53

    
54
    private static final Logger logger = Logger.getLogger(GermanSLTaxonImport.class);
55

    
56
    static final String SPECIES_NR = "SPECIES_NR";
57
    private static final String AUTHOR = "AUTHOR";
58
    private static final String ABBREVIAT = "ABBREVIAT";
59
    private static final String SEC = "SECUNDUM";
60
    private static final String RANG = "RANG";
61
    private static final String EXTERNAL_ID = "external_ID";
62
    private static final String GRUPPE = "GRUPPE";
63
    static final String VALID_NR = "VALID_NR";
64
    static final String SYNONYM = "SYNONYM";
65
    private static final String NATIVENAME = "NATIVENAME";
66
    private static final String LETTER_CODE = "LETTERCODE";
67
    static final String AGG = "AGG";
68

    
69
    private static final String AGG_NAME = "AGG_NAME";
70
    private static final String VALID_NAME = "VALID_NAME";
71

    
72
    private static final String NACHWEIS = "NACHWEIS";
73
    private static final String HYBRID = "HYBRID";
74
    private static final String BEGRUEND = "BEGRUEND";
75
    private static final String EDITSTATUS = "EDITSTATUS";
76

    
77
    private static final String UUID_ = "UUID";
78

    
79

    
80
    public static final String TAXON_NAMESPACE = "1.3.4";
81

    
82
    @Override
83
    protected String getWorksheetName() {
84
        return "1.3.4";
85
    }
86

    
87
    //dirty I know, but who cares, needed by distribution and commmon name import
88
    protected static final Map<String, TaxonBase<?>> taxonIdMap = new HashMap<>();
89

    
90

    
91
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
92
            SPECIES_NR,EXTERNAL_ID,ABBREVIAT,
93
            AUTHOR,SEC,SYNONYM,
94
            LETTER_CODE, AGG,
95
            NATIVENAME,VALID_NR,RANG,GRUPPE,
96
            UUID_,
97
            NACHWEIS, HYBRID, BEGRUEND, EDITSTATUS, AGG_NAME, VALID_NAME
98
        });
99

    
100

    
101
    @Override
102
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
103
        String line = state.getCurrentLine() + ": ";
104
        HashMap<String, String> record = state.getOriginalRecord();
105

    
106
        Set<String> keys = record.keySet();
107

    
108
        checkAllKeysExist(line, keys, expectedKeys);
109

    
110
        //Name
111
        NameResult nameResult = makeName(line, record, state);
112
        BotanicalName taxonName = nameResult.name;
113

    
114
      //sec
115
        String secRefStr = getValue(record, SEC);
116
        Reference sec = getSecRef(state, secRefStr, line);
117

    
118

    
119
        //status
120
        String statusStr = getValue(record, SYNONYM);
121
        TaxonBase<?> taxonBase;
122
        if (isAccepted(statusStr)){
123
            taxonBase = Taxon.NewInstance(taxonName, sec);
124
            if (nameResult.proParte){
125
                logger.warn(line + "accepted taxon can not be pro parte");
126
            }
127
        }else{
128
            Synonym syn = Synonym.NewInstance(taxonName, sec);
129
            if (nameResult.proParte){
130
                syn.setProParte(true);
131
            }
132
            taxonBase = syn;
133
        }
134
        if (!isBlank(nameResult.sensu)){
135
            taxonBase.setAppendedPhrase(nameResult.sensu);
136
        }
137
        //TODO right order?
138
        taxonBase.setAppendedPhrase(CdmUtils.concat(" ", nameResult.auct, taxonBase.getAppendedPhrase()));
139

    
140
        //lettercode
141
        String lettercode = getValue(record, LETTER_CODE);
142
        if (isNotBlank(lettercode)){
143
            UUID idTypeUUID;
144
            try {
145
                idTypeUUID = state.getTransformer().getIdentifierTypeUuid("LETTERCODE");
146
                DefinedTerm idType = getIdentiferType(state, idTypeUUID, "GermanSL lettercode", "GermanSL lettercode", "LETTERCODE", null);
147
                taxonBase.addIdentifier(lettercode, idType);
148
            } catch (UndefinedTransformerMethodException e) {
149
               e.printStackTrace();
150
            }
151
        }
152

    
153
//        //annotation
154
//        String annotation = getValue(record, "Anotacion al Taxon");
155
//        if (annotation != null && (!annotation.equals("nom. illeg.") || !annotation.equals("nom. cons."))){
156
//            taxonBase.addAnnotation(Annotation.NewInstance(annotation, AnnotationType.EDITORIAL(), Language.SPANISH_CASTILIAN()));
157
//        }
158

    
159
        //UUID
160
        String uuid = getValue(record, UUID_);
161
        //TOOD why sometimes null?
162
        if (uuid != null){
163
            taxonBase.setUuid(UUID.fromString(uuid));
164
        }
165

    
166

    
167
        //NATIVE NAME
168
        String commonNameStr = getValue(record, NATIVENAME);
169
        //Ann.: synonym common names should be removed!
170
        if (isNotBlank(commonNameStr)){
171
            makeCommonName(commonNameStr, taxonBase, line);
172
        }
173

    
174

    
175
        //id
176
        String id = getValue(record, SPECIES_NR);
177
        this.addOriginalSource(taxonBase, id, TAXON_NAMESPACE, state.getConfig().getSourceReference());
178

    
179
        //save
180
//        getTaxonService().save(taxonBase);
181
        taxonIdMap.put(id, taxonBase);
182
    }
183

    
184

    
185
    private String removeProparte(String authorStr) {
186
        String regEx = "\\s+p\\.\\s*p\\.$";
187
        if (authorStr == null || !authorStr.matches(".*" + regEx)){
188
            return authorStr;
189
        }else{
190
            return authorStr.replaceAll(regEx, "");
191
        }
192
    }
193

    
194
    private String removeSensuLatoStricto(String authorStr) {
195
        String regEx = "\\s+s\\.\\s*(l|str)\\.$";
196

    
197
        if (authorStr == null || !authorStr.matches(".*" + regEx)){
198
            return authorStr;
199
        }else{
200
            return authorStr.replaceAll(regEx, "");
201
        }
202
    }
203

    
204
    private String removeAuct(String authorStr) {
205
        String regEx = "auct\\.\\??$";
206

    
207
        if (authorStr == null || !authorStr.matches(/*".*" + */regEx)){
208
            return authorStr;
209
        }else{
210
            return ""; //authorStr.replaceAll(regEx, "");
211
        }
212
    }
213

    
214

    
215
    /**
216
     * @param state
217
     * @param secRefStr
218
     * @return
219
     */
220
    private Reference getSecRef(SimpleExcelTaxonImportState<CONFIG> state, String secRefStr, String line) {
221
        Reference result = state.getReference(secRefStr);
222
        if (result == null && secRefStr != null){
223
            result = ReferenceFactory.newGeneric();
224
            result.setTitleCache(secRefStr, true);
225

    
226
//            TimePeriod tp = TimePeriodParser.parseString(secRefStr.substring(secRefStr.length()-4));
227
//            String authorStrPart = secRefStr.substring(0, secRefStr.length()-6);
228
//            if (! (authorStrPart + ", " + tp.getYear()).equals(secRefStr)){
229
//                logger.warn(line + "Sec ref could not be parsed: " + secRefStr);
230
//            }else{
231
//                result.setDatePublished(tp);
232
//            }
233
//            TeamOrPersonBase<?> author = state.getAgentBase(authorStrPart);
234
//            if (author == null){
235
//                if (authorStrPart.contains("&")){
236
//                    Team team = Team.NewInstance();
237
//                    String[] authorSplit = authorStrPart.split("&");
238
//                    String[] firstAuthorSplit = authorSplit[0].trim().split(",");
239
//                    for (String authorStr : firstAuthorSplit){
240
//                        addTeamMember(team, authorStr);
241
//                    }
242
//                    addTeamMember(team, authorSplit[1]);
243
//                    result.setAuthorship(team);
244
//                    state.putAgentBase(team.getTitleCache(), team);
245
//                }else if (authorStrPart.equalsIgnoreCase("Tropicos") || authorStrPart.equalsIgnoreCase("The Plant List")
246
//                        || authorStrPart.equalsIgnoreCase("APG IV")){
247
//                    result.setTitle(authorStrPart);
248
//                }else{
249
//                    Person person = Person.NewInstance();
250
//                    person.setLastname(authorStrPart);
251
//                    result.setAuthorship(person);
252
//                    state.putAgentBase(person.getTitleCache(), person);
253
//                }
254
//            }else{
255
//                result.setAuthorship(author);
256
//            }
257
            state.putReference(secRefStr, result);
258
        }
259

    
260
        return result;
261
    }
262

    
263

    
264
    private class NameResult{
265
        BotanicalName name;
266
        boolean proParte = false;
267
        String sensu = null;
268
        String auct = null;
269
    }
270

    
271
    /**
272
     * @param record
273
     * @param state
274
     * @return
275
     */
276
    private NameResult makeName(String line, HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state) {
277

    
278
        String specieNrStr = getValue(record, SPECIES_NR);
279
        String nameStr = getValue(record, ABBREVIAT);
280
        String authorStr = getValue(record, AUTHOR);
281
        String rankStr = getValue(record, RANG);
282

    
283
        NameResult result = new NameResult();
284

    
285
        //rank
286
        Rank rank = makeRank(line, state, rankStr);
287

    
288
        //name
289
        nameStr = normalizeNameStr(nameStr);
290
        String nameStrWithoutSensu = removeSensuLatoStricto(nameStr);
291
        if (nameStrWithoutSensu.length() < nameStr.length()){
292
            result.sensu = nameStr.substring(nameStrWithoutSensu.length()).trim();
293
            nameStr = nameStrWithoutSensu;
294
        }
295

    
296
        //author
297
        //pp
298
        authorStr = normalizeAuthorStr(authorStr);
299
        String authorStrWithoutProParte = removeProparte(authorStr);
300
        result.proParte = authorStrWithoutProParte.length() < authorStr.length();
301
        authorStr = authorStrWithoutProParte;
302

    
303
        //auct.
304
        String authorStrWithoutAuct = removeAuct(authorStr);
305
        if (authorStrWithoutAuct.length() < authorStr.length()){
306
            result.auct = authorStr.substring(authorStrWithoutAuct.length()).trim();
307
        }
308
        authorStr = authorStrWithoutAuct;
309

    
310

    
311
        //name+author
312
        String fullNameStr = CdmUtils.concat(" ", nameStr, authorStr);
313

    
314
        BotanicalName fullName = (BotanicalName)nameParser.parseReferencedName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
315
        if (fullName.isProtectedTitleCache()){
316
            logger.warn(line + "Name could not be parsed: " + fullNameStr );
317
        }else{
318
            replaceAuthorNamesAndNomRef(state, fullName);
319
        }
320
//        BotanicalName existingName = getExistingName(state, fullName);
321

    
322
        //TODO handle existing name
323
        BotanicalName name = fullName;
324
        this.addOriginalSource(name, specieNrStr, TAXON_NAMESPACE + "_Name", state.getConfig().getSourceReference());
325

    
326
        result.name = name;
327
        return result;
328
    }
329

    
330

    
331

    
332
    /**
333
     * @param line
334
     * @param state
335
     * @param rankStr
336
     * @return
337
     */
338
    private Rank makeRank(String line, SimpleExcelTaxonImportState<CONFIG> state, String rankStr) {
339
        Rank rank = null;
340
        try {
341
            rank = state.getTransformer().getRankByKey(rankStr);
342
            if (rank == null){
343
                UUID rankUuid = state.getTransformer().getRankUuid(rankStr);
344
                OrderedTermVocabulary<Rank> voc = (OrderedTermVocabulary<Rank>)Rank.SPECIES().getVocabulary();
345
                //TODO
346
                Rank lowerRank = Rank.FORM();
347
                rank = getRank(state, rankUuid, rankStr, rankStr, rankStr, voc, lowerRank, RankClass.Infraspecific);
348
                if (rank == null){
349
                    logger.warn(line + "Rank not recognized: " + rankStr);
350
                }
351
            }
352
        } catch (Exception e1) {
353
                logger.warn(line + "Rank not recognized: " + rankStr);
354
        }
355
        return rank;
356
    }
357

    
358

    
359
    /**
360
     * @param authorStr
361
     * @return
362
     */
363
    private String normalizeAuthorStr(String authorStr) {
364
        if (isBlank(authorStr)){
365
            return "";
366
        }else{
367
            if (authorStr.equals("-") || authorStr.equals("#")){
368
                authorStr = "";
369
            }
370
            return authorStr;
371
        }
372
    }
373

    
374
    private String normalizeNameStr(String nameStr) {
375
        nameStr = nameStr
376
                .replace(" agg.", " aggr.")
377
                .replace(" fo. ", " f. ")
378
             ;
379
        return nameStr;
380
    }
381

    
382

    
383
    boolean nameMapIsInitialized = false;
384
    /**
385
     * @param state
386
     * @param fullName
387
     * @return
388
     */
389
    private BotanicalName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, BotanicalName fullName) {
390
        initExistinNames(state);
391
        return (BotanicalName)state.getName(fullName.getTitleCache());
392
    }
393

    
394
    /**
395
     * @param state
396
     */
397
    @SuppressWarnings("rawtypes")
398
    private void initExistinNames(SimpleExcelTaxonImportState<CONFIG> state) {
399
        if (!nameMapIsInitialized){
400
            List<String> propertyPaths = Arrays.asList("");
401
            List<TaxonNameBase> existingNames = this.getNameService().list(null, null, null, null, propertyPaths);
402
            for (TaxonNameBase tnb : existingNames){
403
                state.putName(tnb.getTitleCache(), tnb);
404
            }
405
            nameMapIsInitialized = true;
406
        }
407
    }
408

    
409

    
410
    /**
411
     * @param commmonNameStr
412
     * @param taxonBase
413
     */
414
    private void makeCommonName(String commmonNameStr, TaxonBase<?> taxonBase, String line) {
415
        if (taxonBase.isInstanceOf(Synonym.class)){
416
            //synonym common names should be neglected
417
            return;
418
        }
419
        Taxon acceptedTaxon = getAccepted(taxonBase);
420
        if (acceptedTaxon != null){
421
            TaxonDescription desc = getTaxonDescription(acceptedTaxon, false, true);
422
            desc.setDefault(true);
423
            CommonTaxonName commonName = CommonTaxonName.NewInstance(commmonNameStr, Language.GERMAN(), Country.GERMANY());
424
            desc.addElement(commonName);
425
        }else{
426
            logger.warn(line + "No accepted taxon available");
427
        }
428

    
429
    }
430

    
431

    
432

    
433
    private boolean isAccepted(String statusStr){
434
        if ("FALSE()".equals(statusStr) || "0".equals(statusStr) || "false".equalsIgnoreCase(statusStr)){
435
            return true;
436
        } else if ("TRUE()".equals(statusStr) || "1".equals(statusStr)|| "true".equalsIgnoreCase(statusStr)){
437
            return false;
438
        }else{
439
            logger.warn("Unhandled taxon status: " + statusStr);
440
            return false;
441
        }
442
    }
443

    
444

    
445
    /**
446
     * @param next
447
     * @return
448
     */
449
    private Taxon getAccepted(TaxonBase<?> taxonBase) {
450
        if (taxonBase.isInstanceOf(Taxon.class)){
451
            return CdmBase.deproxy(taxonBase, Taxon.class);
452
        }else{
453
            Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
454
            return syn.getAcceptedTaxon();
455
        }
456
    }
457

    
458

    
459
    @Override
460
    protected boolean isIgnore(SimpleExcelTaxonImportState<CONFIG> state) {
461
        return ! state.getConfig().isDoTaxa();
462
    }
463
}
(2-2/4)