Project

General

Profile

Download (20 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.mexico;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.Iterator;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17
import java.util.UUID;
18

    
19
import org.apache.commons.lang3.StringUtils;
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.Annotation;
28
import eu.etaxonomy.cdm.model.common.AnnotationType;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.common.Extension;
31
import eu.etaxonomy.cdm.model.common.ExtensionType;
32
import eu.etaxonomy.cdm.model.common.Language;
33
import eu.etaxonomy.cdm.model.common.TimePeriod;
34
import eu.etaxonomy.cdm.model.name.BotanicalName;
35
import eu.etaxonomy.cdm.model.name.IBotanicalName;
36
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
38
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
39
import eu.etaxonomy.cdm.model.name.Rank;
40
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
41
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
42
import eu.etaxonomy.cdm.model.reference.Reference;
43
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
44
import eu.etaxonomy.cdm.model.reference.ReferenceType;
45
import eu.etaxonomy.cdm.model.taxon.Classification;
46
import eu.etaxonomy.cdm.model.taxon.Synonym;
47
import eu.etaxonomy.cdm.model.taxon.SynonymType;
48
import eu.etaxonomy.cdm.model.taxon.Taxon;
49
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50
import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
51
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
52
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
53
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
54

    
55
/**
56
 * @author a.mueller
57
 * @date 16.06.2016
58
 *
59
 */
60
@Component
61
public class MexicoConabioTaxonImport<CONFIG extends MexicoConabioImportConfigurator>
62
        extends SimpleExcelTaxonImport<CONFIG>{
63

    
64
    private static final long serialVersionUID = 3691221053127007258L;
65

    
66
    private static final Logger logger = Logger.getLogger(MexicoConabioTaxonImport.class);
67

    
68
    public static final String TAXON_NAMESPACE = "Taxonomia";
69

    
70
    @Override
71
    protected String getWorksheetName() {
72
        return "Taxonomia";
73
    }
74

    
75
    //dirty I know, but who cares, needed by distribution and commmon name import
76
    protected static final Map<String, TaxonBase<?>> taxonIdMap = new HashMap<>();
77

    
78
    private Classification classification;
79

    
80

    
81
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
82
            "IdCAT","IdCATRel","IdCAT_AscendenteInmediato"
83
            ,"IdCAT_AscendenteObligatorio","CategoriaTaxonomica","Nombre",
84
            "EstatusNombre","AutorNombre","AutorSinAnio","Anio",
85
            "ReferenciaNombre",
86
            "Division","AutorDivision","ReferenciaClasificacionDivision",
87
            "Clase","AutorClase","ReferenciaClasificacionClase",
88
            "Subclase","AutorSubclase","ReferenciaClasificacionSubclase",
89
            "Superorden","AutorSuperorden","ReferenciaClasificacionSuperorden",
90
            "Orden","AutorOrden","ReferenciaClasificacionOrden",
91
            "Familia",     "EstatusFamilia","AutorFamilia","ReferenciaClasificacionFamilia",
92
            "Tribu",  "EstatusTribu","AutorTribu","ReferenciaNombreTribu",
93
            "Genero","EstatusGenero","AutorGenero","","ReferenciaNombreGenero",
94
            "Epiteto_especifico","EstatusEspecie","AutorEpiteto_especifico","ReferenciaNombreEspecie",
95
            "CategoriaInfraespecifica","NombreInfraespecifico","EstatusInfraespecie","AutorInfraespecie","ReferenciaNombreInfraespecifico",
96
            "CitaNomenclatural","Anotacion al Taxon","Fuente_BDs",
97
            "FamAceptada","GenAceptado","CategoriaTaxAceptada","NombreAceptado","AutorNombreAceptado","AutorSinAnioAceptado","AnioAceptado",
98
            "TipoRelacion","ReferenciaSinonimia","ComentariosRevisor",
99
            "CompareID","IdCAT_OLD","Nombre_OLD","AutorSinAnio_OLD",
100
            "CitaNomenclatural_OLD","ReferenceType","IsUpdated"
101
        });
102

    
103

    
104
    @Override
105
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
106
        String line = state.getCurrentLine() + ": ";
107
        HashMap<String, String> record = state.getOriginalRecord();
108

    
109
        Set<String> keys = record.keySet();
110

    
111
        checkAllKeysExist(line, keys, expectedKeys);
112

    
113
        if (getValue(record, "Nombre") == null ){
114
            logger.warn("No FullnameNoAuthors given: " + line);
115
            return;
116
        }
117

    
118
        //Name
119
        BotanicalName speciesName = makeName(line, record, state);
120

    
121
        //sec
122
        String secRefStr = getValueNd(record, "ReferenciaNombre");
123
        Reference sec = getSecRef(state, secRefStr, line);
124

    
125
        //status
126
        String statusStr = getValue(record, "EstatusNombre");
127
        TaxonBase<?> taxonBase;
128
        if ("aceptado".equals(statusStr)){
129
            taxonBase = Taxon.NewInstance(speciesName, sec);
130
        }else if (statusStr.startsWith("sin")){
131
            taxonBase = Synonym.NewInstance(speciesName, sec);
132
        }else{
133
            throw new RuntimeException(line + " Status not recognized: " + statusStr);
134
        }
135

    
136
        //annotation
137
        String annotation = getValue(record, "Anotacion al Taxon");
138
        if (annotation != null && (!annotation.equals("nom. illeg.") || !annotation.equals("nom. cons."))){
139
            taxonBase.addAnnotation(Annotation.NewInstance(annotation, AnnotationType.EDITORIAL(), Language.SPANISH_CASTILIAN()));
140
        }
141

    
142
        //id
143
        String idCat = getValue(record, "IdCAT");
144
        this.addOriginalSource(taxonBase, idCat, TAXON_NAMESPACE, state.getConfig().getSourceReference());
145

    
146
        //save
147
        getTaxonService().save(taxonBase);
148
        taxonIdMap.put(idCat, taxonBase);
149

    
150
    }
151

    
152

    
153

    
154
    /**
155
     * @param state
156
     * @param secRefStr
157
     * @return
158
     */
159
    private Reference getSecRef(SimpleExcelTaxonImportState<CONFIG> state, String secRefStr, String line) {
160
        Reference result = state.getReference(secRefStr);
161
        if (result == null && secRefStr != null){
162
            result = ReferenceFactory.newBook();
163
            TimePeriod tp = TimePeriodParser.parseString(secRefStr.substring(secRefStr.length()-4));
164
            String authorStrPart = secRefStr.substring(0, secRefStr.length()-6);
165
            if (! (authorStrPart + ", " + tp.getYear()).equals(secRefStr)){
166
                logger.warn(line + "Sec ref could not be parsed: " + secRefStr);
167
            }else{
168
                result.setDatePublished(tp);
169
            }
170
            TeamOrPersonBase<?> author = state.getAgentBase(authorStrPart);
171
            if (author == null){
172
                if (authorStrPart.contains("&")){
173
                    Team team = Team.NewInstance();
174
                    String[] authorSplit = authorStrPart.split("&");
175
                    String[] firstAuthorSplit = authorSplit[0].trim().split(",");
176
                    for (String authorStr : firstAuthorSplit){
177
                        addTeamMember(team, authorStr);
178
                    }
179
                    addTeamMember(team, authorSplit[1]);
180
                    result.setAuthorship(team);
181
                    state.putAgentBase(team.getTitleCache(), team);
182
                }else if (authorStrPart.equalsIgnoreCase("Tropicos") || authorStrPart.equalsIgnoreCase("The Plant List")
183
                        || authorStrPart.equalsIgnoreCase("APG IV")){
184
                    result.setTitle(authorStrPart);
185
                }else{
186
                    Person person = Person.NewInstance();
187
                    person.setLastname(authorStrPart);
188
                    result.setAuthorship(person);
189
                    state.putAgentBase(person.getTitleCache(), person);
190
                }
191
            }else{
192
                result.setAuthorship(author);
193
            }
194
            state.putReference(secRefStr, result);
195
        }else if(secRefStr == null){
196
            return state.getConfig().getSecReference();
197
        }
198

    
199
        return result;
200
    }
201

    
202

    
203

    
204
    /**
205
     * @param team
206
     * @param author
207
     */
208
    private void addTeamMember(Team team, String author) {
209
        if (StringUtils.isNotBlank(author)){
210
            Person person = Person.NewInstance();
211
            person.setLastname(author.trim());
212
            team.addTeamMember(person);
213
        }
214
    }
215

    
216

    
217

    
218
    /**
219
     * @param record
220
     * @param state
221
     * @return
222
     */
223
    private BotanicalName makeName(String line, HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state) {
224

    
225
        String authorStr = getValueNd(record, "AutorSinAnio");
226
        String nameStr = getValue(record, "Nombre");
227
        String nomRefStr = getValue(record, "CitaNomenclatural");
228
        String refType = getValue(record, "ReferenceType");
229
        String idCat = getValue(record, "IdCAT");
230
        String rankStr = getValue(record, "CategoriaTaxonomica");
231
        String annotation = getValue(record, "Anotacion al Taxon");
232

    
233
        //rank
234
        Rank rank = null;
235
        try {
236
            rank = state.getTransformer().getRankByKey(rankStr);
237
            if (Rank.SUBSPECIES().equals(rank) || Rank.VARIETY().equals(rank)){
238
                int i = nameStr.lastIndexOf(" ");
239
                nameStr = nameStr.substring(0, i) + " " + rank.getAbbreviation() + nameStr.substring(i);
240
            }
241
        } catch (UndefinedTransformerMethodException e) {
242
            logger.warn(line + "Rank not recognized: " + rankStr);
243
        }
244

    
245
        //name + author
246
        String fullNameStr = nameStr + (authorStr != null ? " " + authorStr : "");
247

    
248
        BotanicalName fullName = (BotanicalName)nameParser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
249
        if (fullName.isProtectedTitleCache()){
250
            logger.warn(line + "Name could not be parsed: " + fullNameStr );
251
        }else{
252
            replaceAuthorNamesAndNomRef(state, fullName);
253
        }
254
        BotanicalName existingName = getExistingName(state, fullName);
255

    
256
        //reference
257
        String refNameStr = getRefNameStr(nomRefStr, refType, fullNameStr);
258

    
259
        BotanicalName referencedName = (BotanicalName)nameParser.parseReferencedName(refNameStr, NomenclaturalCode.ICNAFP, rank);
260
        if (referencedName.isProtectedFullTitleCache() || referencedName.isProtectedTitleCache()){
261
            logger.warn(line + "Referenced name could not be parsed: " + refNameStr );
262
        }else{
263
            addSourcesToReferences(referencedName, state);
264
            replaceAuthorNamesAndNomRef(state, referencedName);
265
        }
266
        adaptRefTypeForGeneric(referencedName, refType);
267

    
268
        //compare nom. ref. with Borhidi
269
        BotanicalName result= referencedName;
270
        Boolean equal = null;
271
        if (existingName != null){
272
            String existingRefTitle = existingName.getFullTitleCache();
273
            String conabioRefTitle = referencedName.getFullTitleCache();
274
            if (!existingRefTitle.equals(conabioRefTitle)){
275
                existingName.setNomenclaturalMicroReference(referencedName.getNomenclaturalMicroReference());
276
                existingName.setNomenclaturalReference(referencedName.getNomenclaturalReference());
277
                equal = false;
278
            }else{
279
                equal = true;
280
            }
281
            result = existingName;
282
        }
283
        addNomRefExtension(state, result, equal);
284

    
285
        //status
286
        if (annotation != null && (annotation.equals("nom. illeg.") || annotation.equals("nom. cons."))){
287
            try {
288
                NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(annotation, result);
289
                result.addStatus(NomenclaturalStatus.NewInstance(nomStatusType));
290
            } catch (UnknownCdmTypeException e) {
291
                logger.warn(line + "nomStatusType not recognized: " + annotation);
292
            }
293
        }
294

    
295
        this.addOriginalSource(result, idCat, TAXON_NAMESPACE + "_Name", state.getConfig().getSourceReference());
296

    
297
        return result;
298
    }
299

    
300

    
301

    
302
    /**
303
     * @param name
304
     * @param state
305
     */
306
    private void addSourcesToReferences(IBotanicalName name, SimpleExcelTaxonImportState<CONFIG> state) {
307
        Reference nomRef = (Reference)name.getNomenclaturalReference();
308
        if (nomRef != null){
309
            nomRef.addSource(makeOriginalSource(state));
310
            if (nomRef.getInReference() != null){
311
                nomRef.getInReference().addSource(makeOriginalSource(state));
312
            }
313
        }
314
    }
315

    
316

    
317

    
318
    /**
319
     * @param referencedName
320
     * @param refType
321
     */
322
    private void adaptRefTypeForGeneric(IBotanicalName referencedName, String refTypeStr) {
323
        INomenclaturalReference ref = referencedName.getNomenclaturalReference();
324
        if (ref == null){
325
            return;
326
        }
327
        ReferenceType refType = refTypeByRefTypeStr(refTypeStr);
328
        if (ref.getType() != refType && refType == ReferenceType.Book){
329
            ref.setType(refType);
330
        }
331
    }
332

    
333

    
334
    private ReferenceType refTypeByRefTypeStr(String refType){
335
        if ("A".equals(refType)){  //Article
336
            return ReferenceType.Article;
337
        }else if ("B".equals(refType)){   //Book
338
            return ReferenceType.Book;
339
        }else if (refType == null){   //Book
340
            return null;
341
        }else{
342
            throw new IllegalArgumentException("RefType not supported " + refType);
343
        }
344
    }
345

    
346
    /**
347
     * @param nomRefStr
348
     * @param refType
349
     * @param fullNameStr
350
     * @return
351
     */
352
    private String getRefNameStr(String nomRefStr, String refTypeStr, String fullNameStr) {
353
        String refNameStr = fullNameStr;
354
        ReferenceType refType = refTypeByRefTypeStr(refTypeStr);
355
        if (refType == ReferenceType.Article){
356
            refNameStr = fullNameStr + " in " + nomRefStr;
357
        }else if (refType == ReferenceType.Book){
358
            refNameStr = fullNameStr + ", " + nomRefStr;
359
        }else if (refType == null && nomRefStr != null){
360
            logger.warn("RefType is null but nomRefStr exists");
361
        }
362
        return refNameStr;
363
    }
364

    
365
    /**
366
     * @param state
367
     * @param equal
368
     * @param referencedName
369
     */
370
    private void addNomRefExtension(SimpleExcelTaxonImportState<CONFIG> state, BotanicalName name, Boolean equal) {
371
        String equalStr = equal == null ? "" : equal == true ? "EQUAL\n" : "NOT EQUAL\n";
372
        name.setFullTitleCache(null, false);
373
        String newExtensionStr = name.getFullTitleCache() + " - CONABIO";
374
        UUID uuidNomRefExtension = MexicoConabioTransformer.uuidNomRefExtension;
375
        for (Extension extension : name.getExtensions()){
376
            if (extension.getType().getUuid().equals(uuidNomRefExtension)){
377
                extension.setValue(equalStr + extension.getValue() + "\n" + newExtensionStr);
378
                return;
379
            }
380
        }
381
        String label = "Nomenclatural reference in Sources";
382
        String abbrev = "Nom. ref. src.";
383
        ExtensionType extensionType = getExtensionType(state, uuidNomRefExtension, label, label, abbrev);
384
        Extension.NewInstance(name, newExtensionStr, extensionType);
385
    }
386

    
387
    boolean nameMapIsInitialized = false;
388
    /**
389
     * @param state
390
     * @param fullName
391
     * @return
392
     */
393
    private BotanicalName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, BotanicalName fullName) {
394
        initExistinNames(state);
395
        return (BotanicalName)state.getName(fullName.getTitleCache());
396
    }
397

    
398
    /**
399
     * @param state
400
     */
401
    @SuppressWarnings("rawtypes")
402
    private void initExistinNames(SimpleExcelTaxonImportState<CONFIG> state) {
403
        if (!nameMapIsInitialized){
404
            List<String> propertyPaths = Arrays.asList("");
405
            List<TaxonNameBase> existingNames = this.getNameService().list(null, null, null, null, propertyPaths);
406
            for (TaxonNameBase tnb : existingNames){
407
                state.putName(tnb.getTitleCache(), tnb);
408
            }
409
            nameMapIsInitialized = true;
410
        }
411
    }
412

    
413

    
414

    
415
    /**
416
     * @param record
417
     * @param string
418
     * @return
419
     */
420
    private String getValueNd(HashMap<String, String> record, String string) {
421
        String value = getValue(record, string);
422
        if ("ND".equals(value)){
423
            return null;
424
        }else{
425
            return value;
426
        }
427
    }
428

    
429

    
430
    @Override
431
    protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
432
//        IdCAT_AscendenteInmediato, IdCATRel, TipoRelacion
433
        HashMap<String, String> record = state.getOriginalRecord();
434
        String line = state.getCurrentLine() + ": ";
435

    
436
        String parentStr = getValue(record, "IdCAT_AscendenteInmediato");
437
        String relStr = getValue(record, "IdCATRel");
438

    
439
        String statusStr = getValue(record, "EstatusNombre");
440

    
441
        Classification classification = getClassification(state);
442
        String idCat = getValue(record, "IdCAT");
443
        TaxonBase<?> taxonBase = taxonIdMap.get(idCat);
444
        Taxon parent;
445
        if ("aceptado".equals(statusStr)){
446
            parent = (Taxon)taxonIdMap.get(parentStr);
447
            if (parent == null){
448
                logger.warn(line + "Parent is missing: "+ parentStr);
449
            }else{
450
                Taxon taxon = (Taxon)taxonBase;
451
                Reference relRef = null;  //TODO
452
                classification.addParentChild(parent, taxon, relRef, null);
453
                makeConceptRelation(line, taxon.getName());
454

    
455
            }
456
        }else if (statusStr.startsWith("sin")){
457
            parent = (Taxon)taxonIdMap.get(relStr);
458
            if (parent == null){
459
                logger.warn(line + "Accepted taxon is missing: "+ relStr);
460
            }else{
461
                Synonym synonym = (Synonym)taxonBase;
462
                parent.addSynonym(synonym, SynonymType.SYNONYM_OF());
463
                makeConceptRelation(line, synonym.getName());
464
            }
465
        }
466
    }
467

    
468
     /**
469
     * @param line
470
     * @param name
471
     */
472
    private void makeConceptRelation(String line, TaxonNameBase<?,?> name) {
473
        if (name.getTaxonBases().size()==2){
474
            Iterator<TaxonBase> it = name.getTaxonBases().iterator();
475
            Taxon taxon1 = getAccepted(it.next());
476
            Taxon taxon2 = getAccepted(it.next());
477
            Reference citation = null;
478
            TaxonRelationship rel;
479
            if (taxon1.getSec().getUuid().equals(MexicoConabioTransformer.uuidReferenceBorhidi)){
480
                rel = taxon1.addTaxonRelation(taxon2, TaxonRelationshipType.CONGRUENT_TO(),
481
                        citation, null);
482
            }else{
483
                rel = taxon2.addTaxonRelation(taxon1, TaxonRelationshipType.CONGRUENT_TO(),
484
                        citation, null);
485
            }
486
            rel.setDoubtful(true);
487
        }else if (name.getTaxonBases().size()>2){
488
            logger.warn(line + "Names with more than 2 taxa not yet handled");
489
        }
490

    
491
    }
492

    
493
    /**
494
     * @param next
495
     * @return
496
     */
497
    private Taxon getAccepted(TaxonBase<?> taxonBase) {
498
        if (taxonBase.isInstanceOf(Taxon.class)){
499
            return CdmBase.deproxy(taxonBase, Taxon.class);
500
        }else{
501
            Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
502
            return syn.getAcceptedTaxon();
503
        }
504
    }
505

    
506

    
507

    
508
    /**
509
     * @return
510
     */
511
    private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
512
        if (classification == null){
513
            MexicoConabioImportConfigurator config = state.getConfig();
514
            classification = Classification.NewInstance(config.getClassificationName());
515
            classification.setUuid(config.getClassificationUuid());
516
            classification.setReference(config.getSecReference());
517
            getClassificationService().save(classification);
518
        }
519
        return classification;
520
    }
521

    
522

    
523
    @Override
524
    protected boolean isIgnore(SimpleExcelTaxonImportState<CONFIG> state) {
525
        return ! state.getConfig().isDoTaxa();
526
    }
527
}
(6-6/9)