Project

General

Profile

Download (34.1 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2020 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

    
11
import java.util.ArrayList;
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.Iterator;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19

    
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.api.service.config.SynonymDeletionConfigurator;
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
26
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
27
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
28
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
31
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
32
import eu.etaxonomy.cdm.model.name.Rank;
33
import eu.etaxonomy.cdm.model.name.TaxonName;
34
import eu.etaxonomy.cdm.model.reference.Reference;
35
import eu.etaxonomy.cdm.model.reference.ReferenceType;
36
import eu.etaxonomy.cdm.model.taxon.Synonym;
37
import eu.etaxonomy.cdm.model.taxon.SynonymType;
38
import eu.etaxonomy.cdm.model.taxon.Taxon;
39
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
41
import eu.etaxonomy.cdm.model.term.DefinedTerm;
42
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
43

    
44
/**
45
 * @author a.mueller
46
 * @since 17.02.2020
47
 */
48
@Component
49
public class CaryoAizoaceaeExcelImport extends SimpleExcelTaxonImport<CaryoAizoaceaeExcelImportConfigurator>{
50

    
51
    private static final long serialVersionUID = -729761811965260921L;
52
    private static final Logger logger = Logger.getLogger(CaryoAizoaceaeExcelImport.class);
53

    
54
    private static final String ACCEPTED_PLANT_NAME_ID = "accepted_plant_name_id";
55
    private static final String NOMENCLATURAL_REMARKS = "nomenclatural_remarks";
56
    private static final String TAXON_RANK = "taxon_rank";
57
    private static final String NAME_CIT = "NameCit";
58
    private static final String KEW_NAME4CDM_LINK = "KewName4CDMLink";
59
    private static final String KEW_F_NAME4CDM_LINK = "KewFName4CDMLink";
60
    private static final String TAXON_STATUS = "taxon_status";
61
    private static final String PLANT_NAME_ID = "plant_name_id";
62
    private static final String IPNI_ID = "ipni_id";
63

    
64
    private Map<String, UUID> taxonMapping = new HashMap<>();
65
    private Reference secRef = null;
66
    private Set<String> neglectedRecords = new HashSet<>();
67
    private Set<UUID> createdNames = new HashSet<>();
68

    
69
    private SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state;
70
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> dedupHelper = null;
71

    
72
    @Override
73
    protected void firstPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
74
        int line = state.getCurrentLine();
75
        if ((line % 500) == 0){
76
            newTransaction(state);
77
            System.out.println(line);
78
        }
79

    
80
        this.state = state;
81
        Map<String, String> record = state.getOriginalRecord();
82

    
83
        String fullCitation = getValue(record, NAME_CIT);
84
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
85
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
86
        String status = getValue(record, TAXON_STATUS);
87
        String sourceId = getValue(record, PLANT_NAME_ID);
88
        String ipniId = getValue(record, IPNI_ID);
89
        String rankStr = getValue(record, TAXON_RANK);
90
        String nomenclaturalRemarks = getValue(record, NOMENCLATURAL_REMARKS);
91
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
92

    
93
        String row = String.valueOf(line) + "("+fullName+"): ";
94

    
95
        if("Misapplied".equals(status)){
96
            neglectedRecords.add(sourceId);
97
            return;
98
        }
99

    
100
        boolean isNewName = false;
101

    
102
        try {
103

    
104
            List<NomenclaturalStatusType> statusTypes = new ArrayList<>();
105
            Class<? extends CdmBase> clazz = makeStatus(status, sourceId, accId, row, statusTypes);
106

    
107
            TaxonName name;
108
            Rank rank = state.getTransformer().getRankByKey(rankStr);
109
            List<TaxonName> existingNames = getNameService().getNamesByNameCache(nameCache);
110
            Iterator<TaxonName> it = existingNames.iterator();
111
            while (it.hasNext()){
112
                TaxonName next = it.next();
113
                if (createdNames.contains(next.getUuid())){
114
                    it.remove();
115
                }
116
            }
117

    
118
            List<TaxonName> fullNameMatches = new ArrayList<>();
119

    
120
            @SuppressWarnings("rawtypes")
121
            List<TaxonBase> allFullNameTaxa = new ArrayList<>();
122
            @SuppressWarnings("rawtypes")
123
            List<TaxonBase> allNameCacheTaxa = new ArrayList<>();
124

    
125
            for (TaxonName existingName : existingNames){
126
                if (existingName.getTitleCache().equals(fullName)){
127
                    fullNameMatches.add(existingName);
128
                    allFullNameTaxa.addAll(existingName.getTaxonBases());
129
                }
130
                allNameCacheTaxa.addAll(existingName.getTaxonBases());
131
            }
132

    
133
            logMultipleCandidates(row, existingNames, fullNameMatches);
134

    
135
            TaxonBase<?> existingTaxon;
136
            if(allFullNameTaxa.size()>1){
137
                existingTaxon = findBestMatchingTaxon(allFullNameTaxa, clazz, row);
138
                name = existingTaxon.getName();
139
            }else if (allFullNameTaxa.size()==1){
140
                existingTaxon = allFullNameTaxa.iterator().next();
141
                name = existingTaxon.getName();
142
            }else{
143
                existingTaxon = null;
144
                if (!fullNameMatches.isEmpty()){
145
                    logger.warn(row + "None of the existing names exists as taxon/synonym. Existing name taken as base for new taxon/synonym created.");
146
                    if (fullNameMatches.size()>1){
147
                        logger.warn(row + "More than 1 matching full names exist as candidats for new taxon/synonym. Arbitrary one taken.");
148
                    }
149
                    name = fullNameMatches.iterator().next();
150
                }else if (!existingNames.isEmpty()){
151
                    if (!allNameCacheTaxa.isEmpty()){
152
                        logger.warn(row + "Taxa exist with matching nameCache but not matching fullname cache. New name and new taxon/synonym created. Other authors are " + getOtherAuthors(existingNames));
153
                        name = null;
154
                    }else{
155
                        logger.warn(row + "No matching fullnames exist but namecache matches. None of the matches is used in a taxon/synonym. Other authors are " + getOtherAuthors(existingNames));
156
                        name = null;
157
                    }
158
                }else{
159
                    name = null;
160
                }
161
            }
162

    
163
            if (existingTaxon == null){
164
                if (rank == null){
165
                    logger.warn(row + "Name has no rank " + nameCache);
166
                }else if (rank.equals(Rank.GENUS())){
167
                    logger.warn(row + "No name exists for genus " + nameCache + ". This is unexpected.");
168
                }
169
            }else{
170
                if (existingTaxon.isInstanceOf(Taxon.class)){
171
                    if (!CdmBase.deproxy(existingTaxon, Taxon.class).getTaxonNodes().isEmpty()){
172
                        neglectedRecords.add(sourceId);
173
                    }
174
                }else{
175
                    Taxon taxon = CdmBase.deproxy(existingTaxon, Synonym.class).getAcceptedTaxon();
176
                    if (taxon != null && !taxon.getTaxonNodes().isEmpty()){
177
                        neglectedRecords.add(sourceId);
178
                    }
179
                }
180
            }
181
            if (name == null){
182
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
183
                name = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
184
                if (name.isProtectedFullTitleCache() || name.isProtectedTitleCache() || name.isProtectedNameCache()
185
                        || name.isProtectedAuthorshipCache()){
186
                    logger.warn(row + "Name not parsable: " + fullCitation);
187
                    name.setTitleCache(fullName, true);
188
                    name.setNameCache(nameCache, true);
189
                }else{
190
                    testParsedName(state, name, row, null);
191
                }
192
                name.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
193
                getNameService().saveOrUpdate(name);
194
                isNewName = true;
195
                createdNames.add(name.getUuid());
196
                name = dedupliateNameParts(name);
197
            }else{
198
                testParsedName(state, name, row, fullCitation);
199
            }
200

    
201
            handleNomenclRemarkAndNameStatus(nomenclaturalRemarks, row, isNewName, name, statusTypes);
202

    
203
            TaxonBase<?> taxonBase = existingTaxon;
204

    
205
            if (taxonBase == null){
206
                if (clazz == Taxon.class){
207
                    taxonBase = Taxon.NewInstance(name, getSecRef());
208
                }else{
209
                    taxonBase = Synonym.NewInstance(name, getSecRef());
210
                }
211
                taxonBase.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
212
                getTaxonService().saveOrUpdate(taxonBase);
213
            }
214

    
215
            DefinedTerm ipniIdIdentifierType = DefinedTerm.IDENTIFIER_NAME_IPNI();
216
            name.addIdentifier(ipniId, ipniIdIdentifierType);
217

    
218
            taxonMapping.put(sourceId, taxonBase.getUuid());
219
//            if("Accepted".equals(status)){
220
            if(taxonBase.isInstanceOf(Taxon.class)){
221
                    UUID existingUuid = taxonMapping.put(name.getNameCache(), taxonBase.getUuid());
222
                if (existingUuid != null){
223
                    logger.warn(row + name.getNameCache() + " has multiple instances in file");
224
                }
225
            }
226
        } catch (UndefinedTransformerMethodException e) {
227
            e.printStackTrace();
228
        }
229
    }
230

    
231
    private TaxonName dedupliateNameParts(TaxonName name) {
232
        getDedupHelper().replaceAuthorNamesAndNomRef(state, name);
233
        return name;
234
    }
235

    
236
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> getDedupHelper() {
237
        if (dedupHelper == null){
238
            dedupHelper
239
                = ImportDeduplicationHelper.NewInstance(this, state);
240
        }
241
        return dedupHelper;
242
    }
243

    
244
    private String getOtherAuthors(List<TaxonName> otherNames) {
245
        String result = "";
246
        for (TaxonName name : otherNames){
247
            result = CdmUtils.concat(";", result, name.getAuthorshipCache());
248
        }
249
        return result;
250
    }
251

    
252
    private TaxonBase<?> findBestMatchingTaxon(@SuppressWarnings("rawtypes") List<TaxonBase> allFullNameTaxa,
253
            Class<? extends CdmBase> clazz, String row) {
254

    
255
        TaxonBase<?> result = null;
256
        TaxonBase<?> otherStatus = null;
257
        for (TaxonBase<?> taxonBase : allFullNameTaxa) {
258
            if (taxonBase.isInstanceOf(clazz)){
259
                if (result != null){
260
                    logger.warn(row + "More than 1 taxon with matching full name AND matching status exists. This is not further handled. Arbitrary one taken.");
261
                }
262
                result = taxonBase;
263
            }else{
264
                otherStatus = taxonBase;
265
            }
266
        }
267
        if (result == null && allFullNameTaxa.size()>1){
268
            logger.warn(row + "More than 1 taxon with matching fullname but NOT matching status exists. This is not further handled. Arbitrary one taken.");
269
        }
270
        return result == null? otherStatus :result ;
271
    }
272

    
273
    private void logMultipleCandidates(String row, List<TaxonName> existingNames, List<TaxonName> fullNameMatches) {
274
        if(fullNameMatches.size()>1){
275
            String message = row + "More than one name with matching full name exists in DB. Try to take best matching.";
276
            if (existingNames.size()>fullNameMatches.size()){
277
                message += " Additionally names with matching name cache exist.";
278
            }
279
            logger.warn(message);
280
        }else if (existingNames.size()>1){
281
            String message = row + "More than one name with matching nameCache exists in DB. ";
282
            if(fullNameMatches.isEmpty()){
283
                message += "But none matches full name.";
284
            }else{
285
                message += "But exactly 1 matches full name.";
286
            }
287
            logger.warn(message);
288
        }
289
    }
290

    
291
    private Class<? extends CdmBase> makeStatus(String status, String sourceId,
292
            String accId, String row, List<NomenclaturalStatusType> statusTypes) {
293

    
294
        Class<? extends CdmBase> clazz;
295
        if ("Accepted".equals(status) || "Unplaced".equals(status) || "Artificial Hybrid".equals(status) ){
296
            clazz = Taxon.class;
297
        }else if ("Synonym".equals(status) || "Orthographic".equals(status)){
298
            clazz = (accId == null)? Taxon.class : Synonym.class;
299
            if("Orthographic".equals(status)){
300
                statusTypes.add(NomenclaturalStatusType.SUPERFLUOUS());
301
//                addStatus(NomenclaturalStatusType.SUPERFLUOUS(), row, isNewName, statusAdded, statusTypes, null);
302
            }
303
        }else if("Illegitimate".equals(status)){
304
            clazz = getIllegInvalidStatus(sourceId, accId);
305
            statusTypes.add(NomenclaturalStatusType.ILLEGITIMATE());
306
//            addStatus(NomenclaturalStatusType.ILLEGITIMATE(), row, isNewName, statusAdded, statusTypes, getSecRef());
307
        }else if ("Invalid".equals(status)){
308
            clazz = getIllegInvalidStatus(sourceId, accId);
309
            statusTypes.add(NomenclaturalStatusType.INVALID());
310
//            addStatus(NomenclaturalStatusType.INVALID(), row, isNewName, statusAdded, statusTypes, getSecRef());
311
        }else{
312
            logger.warn(row + "Unhandled status: " + status);
313
            clazz = Taxon.class;  //to do something
314
        }
315
        return clazz;
316
    }
317

    
318
    private void handleNomenclRemarkAndNameStatus(String nomenclaturalRemarks, String row, boolean isNewName, TaxonName name,
319
            List<NomenclaturalStatusType> statusTypes) {
320

    
321
        NomenclaturalStatusType remarkType = null;
322
        NomenclaturalStatusType statusType = statusTypes.isEmpty()? null: statusTypes.iterator().next();
323
        if (nomenclaturalRemarks == null){
324
           //nothing to do
325
        }else if (", nom. illeg.".equals(nomenclaturalRemarks)){
326
            remarkType = NomenclaturalStatusType.ILLEGITIMATE();
327
        }else if (", nom. cons.".equals(nomenclaturalRemarks)){
328
            remarkType = NomenclaturalStatusType.CONSERVED();
329
        }else if (", nom. nud.".equals(nomenclaturalRemarks)){
330
            remarkType = NomenclaturalStatusType.NUDUM();
331
        }else if (", nom. provis.".equals(nomenclaturalRemarks)){
332
            remarkType = NomenclaturalStatusType.PROVISIONAL();
333
        }else if (", nom. rej.".equals(nomenclaturalRemarks)){
334
            remarkType = NomenclaturalStatusType.REJECTED();
335
        }else if (", nom. subnud.".equals(nomenclaturalRemarks)){
336
            remarkType = NomenclaturalStatusType.SUBNUDUM();
337
        }else if (", nom. superfl.".equals(nomenclaturalRemarks)){
338
            remarkType = NomenclaturalStatusType.SUPERFLUOUS();
339
        }else if (", not validly publ.".equals(nomenclaturalRemarks)){
340
            statusTypes.add(NomenclaturalStatusType.INVALID());
341
        }else if (", opus utique oppr.".equals(nomenclaturalRemarks)){
342
            statusTypes.add(NomenclaturalStatusType.OPUS_UTIQUE_OPPR());
343
        }else {
344
            logger.warn(row + "Unhandled nomenclatural remark: " + nomenclaturalRemarks);
345
        }
346

    
347
        NomenclaturalStatusType kewType = remarkType != null? remarkType : statusType;
348
        if (isNewName){
349
            if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
350
                logger.warn(row + "Kew suggests 2 different nom. status. types for new name. The status from nomenclatural_remarks was taken.");
351
            }
352
            if (kewType != null){
353
                name.addStatus(kewType, getSecRef(), null);
354
            }
355
        }else{
356
            NomenclaturalStatusType existingType = null;
357
            if (!name.getStatus().isEmpty()){
358
                existingType = name.getStatus().iterator().next().getType();
359
            }
360
            if (existingType != null && kewType != null){
361
                if (!existingType.equals(kewType)){
362
                    logger.warn(row + "Existing name status "+existingType.getTitleCache()+" differs from Kew status " + kewType.getTitleCache() + ". Key status ignored");
363
                }
364
            }else if (existingType != null && kewType == null){
365
                logger.warn(row + "Info: Existing name has a name status "+existingType.getTitleCache()+" but Kew name has no status. Existing status kept.");
366
            }else if (existingType == null && kewType != null){
367
                if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
368
                    logger.warn(row + "Existing name has no status while Kew name suggests a status (but 2 different status form status and nomenclatural_remarks field).");
369
                }else{
370
                    logger.warn(row + "Existing name has no status while Kew name suggests a status ("+kewType.getTitleCache()+"). Kew status ignored.");
371
                }
372
            }
373
        }
374
    }
375

    
376
    private void newTransaction(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
377
        commitTransaction(state.getTransactionStatus());
378
        secRef = null;
379
        dedupHelper = null;
380
        System.gc();
381
        state.setTransactionStatus(startTransaction());
382
    }
383

    
384
    private Reference getSecRef() {
385
        if (secRef == null){
386
            secRef = getReferenceService().find(state.getConfig().getSecUuid());
387
        }
388
        return secRef;
389
    }
390

    
391
    private Class<? extends CdmBase> getIllegInvalidStatus(String sourceId, String accId) {
392
        if (sourceId.equals(accId)){
393
            return Taxon.class;
394
        }else if(accId != null){
395
            return Synonym.class;
396
        }
397
        return null;
398
    }
399

    
400

    
401
    private void testParsedName(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state, TaxonName name,
402
            String row, String fullCitation) throws UndefinedTransformerMethodException {
403
        Map<String, String> record = state.getOriginalRecord();
404

    
405
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
406
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
407
        String rankStr = getValue(record, TAXON_RANK);
408
        String genusHybrid = getValue(record, "genus_hybrid");
409
        String genus = getValue(record, "genus");
410
        String speciesHybrid = getValue(record, "species_hybrid");
411
        String species = getValue(record, "species");
412
        String infraSpecRank = getValue(record, "infraspecific_rank");
413
        String infraspecies = getValue(record, "infraspecies");
414
        String basionymAuthor = getValue(record, "parenthetical_author");
415
        String combinationAuthor = getValue(record, "primary_author");
416
        String authors = getValue(record, "taxon_authors");
417
        String year = getValue(record, "KewYear4CDM");
418
        String pubType = getValue(record, "PubType");
419
        String place_of_publication = getValue(record, "place_of_publication");
420
        String volume_and_page = getValue(record, "volume_and_page");
421

    
422
        if (!CdmUtils.nullSafeEqual(name.getNameCache(), nameCache)){
423
            logger.warn(row + "Unexpected nameCache: " + nameCache);
424
        }
425
        if (!CdmUtils.nullSafeEqual(name.getTitleCache(), fullName)){
426
            logger.warn(row + "Unexpected titleCache: <->" + name.getTitleCache());
427
        }
428
        if (isBlank(genusHybrid) == name.isMonomHybrid()){
429
            logger.warn(row + "Unexpected genus hybrid: " + genusHybrid);
430
        }
431
        if (!CdmUtils.nullSafeEqual(name.getGenusOrUninomial(),genus)){
432
            logger.warn(row + "Unexpected genus: " + genus);
433
        }if (isBlank(speciesHybrid) == name.isBinomHybrid()){
434
            logger.warn(row + "Unexpected species hybrid: " + speciesHybrid);
435
        }
436
        if (!CdmUtils.nullSafeEqual(name.getSpecificEpithet(),species)){
437
            logger.warn(row + "Unexpected species epithet: " + name.getSpecificEpithet() +"<->"+ species);
438
        }
439
        if (!CdmUtils.nullSafeEqual(name.getInfraSpecificEpithet(), infraspecies)){
440
            logger.warn(row + "Unexpected infraspecific epithet: " + name.getInfraSpecificEpithet() +"<->"+ infraspecies);
441
        }
442
        if (!CdmUtils.nullSafeEqual(name.getAuthorshipCache(),authors)){
443
            logger.warn(row + "Unexpected authors: " + name.getAuthorshipCache() +"<->"+ authors);
444
        }
445
        Rank rank = state.getTransformer().getRankByKey(rankStr);
446
        if (!rank.equals(name.getRank())){
447
            logger.warn(row + "Unexpected rank: " + rankStr);
448
        }
449

    
450
        Reference nomRef = name.getNomenclaturalReference();
451
        if (nomRef == null){
452
            if (fullCitation != null){
453
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
454
                TaxonName parsedName = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
455
                if (parsedName.getNomenclaturalReference() != null){
456
                    name.setNomenclaturalReference(parsedName.getNomenclaturalReference());
457
                    logger.warn(row + "Nom.ref. was missing. Taken from Kew");
458
                }else{
459
                    logger.warn(row + "Nom. ref. is missing or can not be parsed");
460
                }
461
            }else{
462
                logger.warn(row + "NomRef is missing.");
463
            }
464
        }else{
465
            if ("A".equals(pubType) && nomRef.getType() != ReferenceType.Article){
466
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
467
            }
468
            if ("B".equals(pubType) && nomRef.getType() != ReferenceType.Book){
469
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
470
            }
471
            year = normalizeYear(year);
472
            if (!CdmUtils.nullSafeEqual(year, nomRef.getDatePublishedString())){
473
                logger.warn(row + "Unexpected year: " + year + "<->" + nomRef.getDatePublishedString());
474
            }
475
            if (!name.getFullTitleCache().contains(volume_and_page)){
476
                logger.warn(row + "volume_and_page not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ volume_and_page);
477
            }
478
            if (!name.getFullTitleCache().contains(place_of_publication)){
479
                logger.warn(row + "place_of_publication not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ place_of_publication);
480
            }
481
        }
482
        if ("subsp.".equals(infraSpecRank) && !rank.equals(Rank.SUBSPECIES())){
483
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
484
        }else if ("var.".equals(infraSpecRank) && !rank.equals(Rank.VARIETY())){
485
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
486
        }else if ("f.".equals(infraSpecRank) && !rank.equals(Rank.FORM())){
487
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
488
        }
489

    
490
    }
491

    
492
    private String normalizeYear(String year) {
493
        if (year == null){
494
            return null;
495
        }else if (year.contains("\" [")){
496
            String[] split = year.split("\" \\[");
497
            year = split[1].replace("]","") + " [" + split[0]+"\"]";
498
        }else if ("?".equals(year)){
499
            return null;
500
        }
501
        return year;
502
    }
503

    
504
    @Override
505
    protected void secondPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
506
        Map<String, String> record = state.getOriginalRecord();
507
        int line = state.getCurrentLine();
508
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
509
        String status = getValue(record, TAXON_STATUS);
510
        String sourceId = getValue(record, PLANT_NAME_ID);
511
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
512
        String accName = getValue(record, "AcceptedName");
513
        String basionymId = getValue(record, "basionym_plant_name_id");
514
        String homotypicSynonym = getValue(record, "homotypic_synonym");
515

    
516
        String row = String.valueOf(line) + "("+fullName+"): ";
517
        try {
518
            if ((line % 100) == 0){
519
                newTransaction(state);
520
                System.out.println(line);
521
            }
522

    
523
            if("Misapplied".equals(status)){
524
                return;
525
            }else if (neglectedRecords.contains(sourceId)){
526
                logger.info(row + "Record ignored.");
527
                return;
528
            }
529

    
530
            UUID uuid = taxonMapping.get(sourceId);
531
            TaxonBase<?> taxonBase = getTaxonService().find(uuid);
532
            if (taxonBase == null){
533
                logger.warn(row + "taxonBase not found: " + sourceId);
534
                return;
535
            }
536

    
537
            UUID accUuid = taxonMapping.get(accId);
538
            boolean hasAccepted = !sourceId.equals(accId);
539

    
540
            Taxon accTaxon = null;
541
            TaxonNode parent = null;
542
            Taxon child = null;
543
            Synonym syn = null;
544
            boolean isSynonymAccepted = false;
545

    
546
            if(accId == null){
547
                logger.info(row + "accID is null");
548
                child = CdmBase.deproxy(taxonBase, Taxon.class);
549
            }else if(hasAccepted){
550
                TaxonBase<?> accTaxonBase = getTaxonService().find(accUuid);
551
                if (accTaxonBase == null){
552
                    logger.warn(row + "acctaxon not found: " + accId + "; " + accName);
553
                }else if(!accTaxonBase.isInstanceOf(Taxon.class)){
554
                    logger.warn(row + "acctaxon is synonym: " + accId + "; " + accName);
555
                    isSynonymAccepted = true;
556
                }else{
557
                    accTaxon = CdmBase.deproxy(accTaxonBase, Taxon.class);
558
                    if (!accTaxon.getName().getTitleCache().equals(accName)){
559
                        logger.warn(row + "Accepted name differs: " + accName +" <-> "+ accTaxon.getName().getTitleCache());
560
                    }
561
                }
562
            }else if (sourceId.equals(accId)){
563
                if (!taxonBase.isInstanceOf(Taxon.class)){
564
                    logger.warn(row + "child not of class Taxon: " + sourceId);
565
                }else{
566
                    Rank rank = taxonBase.getName().getRank();
567
                    child = CdmBase.deproxy(taxonBase, Taxon.class);
568
                    if(rank.equals(Rank.GENUS())){
569
                        parent = getFamily();
570
                    }else if (rank.equals(Rank.SPECIES())){
571
                        String genus = child.getName().getGenusOrUninomial();
572
                        UUID parentUuid = taxonMapping.get(genus);
573
                        parent = getParent(parentUuid, row);
574
                    }else if (rank.isLower(Rank.SPECIES())){
575
                        String speciesName = child.getName().getGenusOrUninomial() + " " + child.getName().getSpecificEpithet();
576
                        UUID parentUuid = taxonMapping.get(speciesName);
577
                        parent = getParent(parentUuid, row);
578
                    }
579
                }
580
            }
581

    
582
            if (taxonBase.isInstanceOf(Synonym.class)){
583
                syn = CdmBase.deproxy(taxonBase, Synonym.class);
584
            }
585

    
586
            if ("Accepted".equals(status)){
587
                if (parent == null){
588
                    logger.warn(row + "Parent is missing. Taxon is moved to 'unresolved' instead'");
589
                    parent = unresolvedParent();
590
                }
591
                if (child == null){
592
                    logger.warn(row + "Child is missing. Taxon not imported.");
593
                }else{
594
                    if (!child.getTaxonNodes().isEmpty()){
595
                        if(!child.getName().getRank().equals(Rank.GENUS())){
596
                            logger.warn(row + "Taxon already has a parent. Taxon not attached to any further parent taxon.");
597
                        }
598
                    }else{
599
                        addChild(parent, child, row);
600
                    }
601
                }
602
            }else if ("Synonym".equals(status)){
603
                if(accTaxon == null){
604
                    if(isSynonymAccepted){
605
                        logger.warn(row +  "Synonym added to 'unresolved' as accepted taxon is synonym itself.");
606
                    }else if (accId != null){
607
                        logger.warn(row +  "Accepted taxon for synonym unexpectedly does not exist (it seems not to be a synonym itself). Synonym moved to 'unresolved'");
608
                    }else{
609
                        logger.warn(row +  "No accepted taxon given for synonym. Therefore taxon moved to 'unresolved'");
610
                    }
611
                    if(accId != null){
612
                        child = Taxon.NewInstance(syn.getName(), syn.getSec());
613
                    }
614
                    addChild(unresolvedParent(), child, row);
615
                    getTaxonService().deleteSynonym(syn, new SynonymDeletionConfigurator());
616
                }else{
617
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
618
                }
619
            }else if ("Unplaced".equals(status)){
620
                parent = unresolvedParent();
621
                addChild(parent, child, row);
622
            }else if ("Artificial Hybrid".equals(status)){
623
                parent = hybridParent();
624
                addChild(parent, child, row);
625
            }else if ("Orthographic".equals(status)){
626
                if(accTaxon == null){
627
                    logger.warn(row + "'Orthographic' taxon has no acc taxon");
628
                }else{
629
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
630
                }
631
            }else if("Illegitimate".equals(status) || "Invalid".equals(status)){
632
                if (hasAccepted){
633
                    if(accTaxon == null){
634
                        logger.warn(row + "accepted taxon for illegitimate or invalid taxon not found");
635
                    }else{
636
                        accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
637
                    }
638
                }else{
639
                    addChild(unresolvedParent(), child, row);
640
                }
641
            }else{
642
                logger.warn(row + "Unhandled status: " +  status);
643
            }
644

    
645
            if (basionymId != null){
646
                UUID basionymUuid = taxonMapping.get(basionymId);
647
                TaxonBase<?> basionymTaxon = getTaxonService().find(basionymUuid);
648
                if (basionymTaxon != null){
649
                    if (hasSameAcceptedTaxon(taxonBase, basionymTaxon)){
650
                        if (taxonBase.getName().getBasionym() == null){
651
                            taxonBase.getName().addBasionym(basionymTaxon.getName());
652
                        }
653
                    }else{
654
                        logger.warn(row + "Basionym has not same accepted taxon and therefore was ignored.");
655
                    }
656
                }else{
657
                    logger.warn(row + "Basionym "+basionymId+" not found.");
658
                }
659
            }
660
        } catch (Exception e) {
661
            logger.error(row + "Error.");
662
            e.printStackTrace();
663
        }
664
    }
665

    
666
    private boolean hasSameAcceptedTaxon(TaxonBase<?> taxonBase, TaxonBase<?> basionymTaxon) {
667
        if (taxonBase.isInstanceOf(Synonym.class)){
668
            taxonBase = CdmBase.deproxy(taxonBase, Synonym.class);
669
        }
670
        if (basionymTaxon.isInstanceOf(Synonym.class)){
671
            basionymTaxon = CdmBase.deproxy(basionymTaxon, Synonym.class);
672
        }
673
        return taxonBase.equals(basionymTaxon);
674
    }
675

    
676
    private TaxonNode getParent(UUID parentUuid, String row) {
677
        if(parentUuid == null){
678
            logger.warn(row + "Parent uuid is null. No parent found.");
679
            return null;
680
        }
681
        TaxonBase<?> pTaxon = getTaxonService().find(parentUuid);
682
        if (pTaxon == null){
683
            logger.warn(row + "No parent found for parent UUID. This should not happen.");
684
            return null;
685
        }
686
        if (pTaxon.isInstanceOf(Synonym.class)){
687
            logger.warn(row + "Parent is synonym");
688
            return null;
689
        }else{
690
            Taxon ptax = CdmBase.deproxy(pTaxon, Taxon.class);
691
            if(ptax.getTaxonNodes().isEmpty()){
692
                logger.warn(row + "Parent has no node yet");
693
                return null;
694
            }else {
695
                if(ptax.getTaxonNodes().size()>1){
696
                    logger.warn("Parent has >1 nodes. Take arbitrary one");
697
                }
698
                return ptax.getTaxonNodes().iterator().next();
699
            }
700
        }
701
    }
702

    
703
    private void addChild(TaxonNode parent, Taxon child, String row) {
704
        if (parent == null){
705
            logger.warn(row + "Parent is null");
706
        }else if (child == null){
707
            logger.warn(row + "Child is null");
708
        }else{
709
            if (!child.getTaxonNodes().isEmpty()){
710
                TaxonNode childNode = child.getTaxonNodes().iterator().next();
711
                if (childNode.getParent() != null && childNode.getParent().equals(parent)){
712
                    logger.info(row + "Parent-child relation exists already.");
713
                }else{
714
                    logger.warn(row + "Child already has different parent. Parent-child relation not added.");
715
                }
716
            }else{
717
                TaxonNode node = parent.addChildTaxon(child, null, null);
718
                getTaxonNodeService().saveOrUpdate(node);
719
            }
720
        }
721
    }
722

    
723
    private TaxonNode getFamily(){
724
        UUID uuid = UUID.fromString("0334809a-aa20-447d-add9-138194f80f56");
725
        TaxonNode aizoaceae = getTaxonNodeService().find(uuid);
726
        return aizoaceae;
727
    }
728

    
729
    private TaxonNode hybridParent(){
730
        UUID uuid = UUID.fromString("2fae0fa1-758a-4fcb-bb6c-a2bd11f40641");
731
        TaxonNode hybridParent = getTaxonNodeService().find(uuid);
732
        return hybridParent;
733
    }
734
    private TaxonNode unresolvedParent(){
735
        UUID uuid = UUID.fromString("accb1ff6-5748-4b18-b529-9368c331a38d");
736
        TaxonNode unresolvedParent = getTaxonNodeService().find(uuid);
737
        return unresolvedParent;
738
    }
739
}
(1-1/7)