Project

General

Profile

Download (35.3 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2020 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

    
11
import java.util.ArrayList;
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.Iterator;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19

    
20
import org.apache.logging.log4j.LogManager;
21
import org.apache.logging.log4j.Logger;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.api.service.config.SynonymDeletionConfigurator;
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
27
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
28
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30
import eu.etaxonomy.cdm.model.common.CdmBase;
31
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
32
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
33
import eu.etaxonomy.cdm.model.name.Rank;
34
import eu.etaxonomy.cdm.model.name.RankClass;
35
import eu.etaxonomy.cdm.model.name.TaxonName;
36
import eu.etaxonomy.cdm.model.reference.Reference;
37
import eu.etaxonomy.cdm.model.reference.ReferenceType;
38
import eu.etaxonomy.cdm.model.taxon.Synonym;
39
import eu.etaxonomy.cdm.model.taxon.SynonymType;
40
import eu.etaxonomy.cdm.model.taxon.Taxon;
41
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
43
import eu.etaxonomy.cdm.model.term.IdentifierType;
44
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
45

    
46
/**
47
 * @author a.mueller
48
 * @since 17.02.2020
49
 */
50
@Component
51
public class CaryoAizoaceaeExcelImport extends SimpleExcelTaxonImport<CaryoAizoaceaeExcelImportConfigurator>{
52

    
53
    private static final long serialVersionUID = -729761811965260921L;
54
    private static final Logger logger = LogManager.getLogger();
55

    
56
    private static final String ACCEPTED_PLANT_NAME_ID = "accepted_plant_name_id";
57
    private static final String NOMENCLATURAL_REMARKS = "nomenclatural_remarks";
58
    private static final String TAXON_RANK = "taxon_rank";
59
    private static final String NAME_CIT = "NameCit";
60
    private static final String KEW_NAME4CDM_LINK = "KewName4CDMLink";
61
    private static final String KEW_F_NAME4CDM_LINK = "KewFName4CDMLink";
62
    private static final String TAXON_STATUS = "taxon_status";
63
    private static final String PLANT_NAME_ID = "plant_name_id";
64
    private static final String IPNI_ID = "ipni_id";
65

    
66
    private Map<String, UUID> taxonMapping = new HashMap<>();
67
    private Reference secRef = null;
68
    private Set<String> neglectedRecords = new HashSet<>();
69
    private Set<UUID> createdNames = new HashSet<>();
70

    
71
    private SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state;
72

    
73
    @Override
74
    protected void firstPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
75
        int line = state.getCurrentLine();
76
        if ((line % 500) == 0){
77
            newTransaction(state);
78
            System.out.println(line);
79
        }
80

    
81
        this.state = state;
82
        Map<String, String> record = state.getOriginalRecord();
83

    
84
        String fullCitation = getValue(record, NAME_CIT);
85
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
86
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
87
        String status = getValue(record, TAXON_STATUS);
88
        String sourceId = getValue(record, PLANT_NAME_ID);
89
        String ipniId = getValue(record, IPNI_ID);
90
        String rankStr = getValue(record, TAXON_RANK);
91
        String nomenclaturalRemarks = getValue(record, NOMENCLATURAL_REMARKS);
92
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
93

    
94
        String row = String.valueOf(line) + "("+fullName+"): ";
95

    
96
        if("Misapplied".equals(status)){
97
            neglectedRecords.add(sourceId);
98
            return;
99
        }
100

    
101
        boolean isNewName = false;
102

    
103
        try {
104

    
105
            List<NomenclaturalStatusType> statusTypes = new ArrayList<>();
106
            Class<? extends CdmBase> clazz = makeStatus(status, sourceId, accId, row, statusTypes);
107

    
108
            TaxonName name;
109
            Rank rank = state.getTransformer().getRankByKey(rankStr);
110
            List<TaxonName> existingNames = getNameService().getNamesByNameCache(nameCache);
111
            Iterator<TaxonName> it = existingNames.iterator();
112
            while (it.hasNext()){
113
                TaxonName next = it.next();
114
                if (createdNames.contains(next.getUuid())){
115
                    it.remove();
116
                }
117
            }
118

    
119
            List<TaxonName> fullNameMatches = new ArrayList<>();
120

    
121
            @SuppressWarnings("rawtypes")
122
            List<TaxonBase> allFullNameTaxa = new ArrayList<>();
123
            @SuppressWarnings("rawtypes")
124
            List<TaxonBase> allNameCacheTaxa = new ArrayList<>();
125

    
126
            for (TaxonName existingName : existingNames){
127
                if (existingName.getTitleCache().equals(fullName)){
128
                    fullNameMatches.add(existingName);
129
                    allFullNameTaxa.addAll(existingName.getTaxonBases());
130
                }
131
                allNameCacheTaxa.addAll(existingName.getTaxonBases());
132
            }
133

    
134
            logMultipleCandidates(row, existingNames, fullNameMatches);
135

    
136
            TaxonBase<?> existingTaxon;
137
            if(allFullNameTaxa.size()>1){
138
                existingTaxon = findBestMatchingTaxon(allFullNameTaxa, clazz, row);
139
                name = existingTaxon.getName();
140
            }else if (allFullNameTaxa.size()==1){
141
                existingTaxon = allFullNameTaxa.iterator().next();
142
                name = existingTaxon.getName();
143
            }else{
144
                existingTaxon = null;
145
                if (!fullNameMatches.isEmpty()){
146
                    logger.warn(row + "None of the existing names exists as taxon/synonym. Existing name taken as base for new taxon/synonym created.");
147
                    if (fullNameMatches.size()>1){
148
                        logger.warn(row + "More than 1 matching full names exist as candidats for new taxon/synonym. Arbitrary one taken.");
149
                    }
150
                    name = fullNameMatches.iterator().next();
151
                }else if (!existingNames.isEmpty()){
152
                    if (!allNameCacheTaxa.isEmpty()){
153
                        logger.warn(row + "Taxa exist with matching nameCache but not matching fullname cache. New name and new taxon/synonym created. Other authors are " + getOtherAuthors(existingNames));
154
                        name = null;
155
                    }else{
156
                        logger.warn(row + "No matching fullnames exist but namecache matches. None of the matches is used in a taxon/synonym. Other authors are " + getOtherAuthors(existingNames));
157
                        name = null;
158
                    }
159
                }else{
160
                    name = null;
161
                }
162
            }
163

    
164
            if (existingTaxon == null){
165
                if (rank == null){
166
                    logger.warn(row + "Name has no rank " + nameCache);
167
                }else if (rank.equals(Rank.GENUS())){
168
                    logger.warn(row + "No name exists for genus " + nameCache + ". This is unexpected.");
169
                }
170
            }else{
171
                if (existingTaxon.isInstanceOf(Taxon.class)){
172
                    if (!CdmBase.deproxy(existingTaxon, Taxon.class).getTaxonNodes().isEmpty()){
173
                        neglectedRecords.add(sourceId);
174
                    }
175
                }else{
176
                    Taxon taxon = CdmBase.deproxy(existingTaxon, Synonym.class).getAcceptedTaxon();
177
                    if (taxon != null && !taxon.getTaxonNodes().isEmpty()){
178
                        neglectedRecords.add(sourceId);
179
                    }
180
                }
181
            }
182
            if (name == null){
183
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
184
                name = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
185
                if (name.isProtectedFullTitleCache() || name.isProtectedTitleCache() || name.isProtectedNameCache()
186
                        || name.isProtectedAuthorshipCache()){
187
                    logger.warn(row + "Name not parsable: " + fullCitation);
188
                    name.setTitleCache(fullName, true);
189
                    name.setNameCache(nameCache, true);
190
                }else{
191
                    testParsedName(state, name, row, null);
192
                }
193
                name.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
194
                name = dedupliateNameParts(name);
195
                getNameService().saveOrUpdate(name);
196
                isNewName = true;
197
                createdNames.add(name.getUuid());
198
            }else{
199
                testParsedName(state, name, row, fullCitation);
200
            }
201

    
202
            handleNomenclRemarkAndNameStatus(nomenclaturalRemarks, row, isNewName, name, statusTypes);
203

    
204
            TaxonBase<?> taxonBase = existingTaxon;
205

    
206
            if (taxonBase == null){
207
                if (clazz == Taxon.class){
208
                    taxonBase = Taxon.NewInstance(name, getSecRef());
209
                }else{
210
                    taxonBase = Synonym.NewInstance(name, getSecRef());
211
                }
212
                taxonBase.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
213
                getTaxonService().saveOrUpdate(taxonBase);
214
            }
215

    
216
            if (!isBlank(ipniId)){
217
                IdentifierType ipniIdIdentifierType = IdentifierType.IDENTIFIER_NAME_IPNI();
218
                name.addIdentifier(ipniId, ipniIdIdentifierType);
219
            }else{
220
                logger.warn(row + "IPNI id is missing.");
221
            }
222

    
223
            taxonMapping.put(sourceId, taxonBase.getUuid());
224
//            if("Accepted".equals(status)){
225
            if(taxonBase.isInstanceOf(Taxon.class)){
226
                    UUID existingUuid = taxonMapping.put(name.getNameCache(), taxonBase.getUuid());
227
                if (existingUuid != null){
228
                    logger.warn(row + name.getNameCache() + " has multiple instances in file");
229
                }
230
            }
231
        } catch (UndefinedTransformerMethodException e) {
232
            e.printStackTrace();
233
        }
234
    }
235

    
236
    private TaxonName dedupliateNameParts(TaxonName name) {
237
        if (state.getConfig().isDoDeduplicate()){
238
            state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
239
        }
240
        return name;
241
    }
242

    
243
    private String getOtherAuthors(List<TaxonName> otherNames) {
244
        String result = "";
245
        for (TaxonName name : otherNames){
246
            result = CdmUtils.concat(";", result, name.getAuthorshipCache());
247
        }
248
        return result;
249
    }
250

    
251
    private TaxonBase<?> findBestMatchingTaxon(@SuppressWarnings("rawtypes") List<TaxonBase> allFullNameTaxa,
252
            Class<? extends CdmBase> clazz, String row) {
253

    
254
        TaxonBase<?> result = null;
255
        TaxonBase<?> otherStatus = null;
256
        for (TaxonBase<?> taxonBase : allFullNameTaxa) {
257
            if (taxonBase.isInstanceOf(clazz)){
258
                if (result != null){
259
                    logger.warn(row + "More than 1 taxon with matching full name AND matching status exists. This is not further handled. Arbitrary one taken.");
260
                }
261
                result = taxonBase;
262
            }else{
263
                otherStatus = taxonBase;
264
            }
265
        }
266
        if (result == null && allFullNameTaxa.size()>1){
267
            logger.warn(row + "More than 1 taxon with matching fullname but NOT matching status exists. This is not further handled. Arbitrary one taken.");
268
        }
269
        return result == null? otherStatus :result ;
270
    }
271

    
272
    private void logMultipleCandidates(String row, List<TaxonName> existingNames, List<TaxonName> fullNameMatches) {
273
        if(fullNameMatches.size()>1){
274
            String message = row + "More than one name with matching full name exists in DB. Try to take best matching.";
275
            if (existingNames.size()>fullNameMatches.size()){
276
                message += " Additionally names with matching name cache exist.";
277
            }
278
            logger.warn(message);
279
        }else if (existingNames.size()>1){
280
            String message = row + "More than one name with matching nameCache exists in DB. ";
281
            if(fullNameMatches.isEmpty()){
282
                message += "But none matches full name.";
283
            }else{
284
                message += "But exactly 1 matches full name.";
285
            }
286
            logger.warn(message);
287
        }
288
    }
289

    
290
    private Class<? extends CdmBase> makeStatus(String status, String sourceId,
291
            String accId, String row, List<NomenclaturalStatusType> statusTypes) {
292

    
293
        Class<? extends CdmBase> clazz;
294
        if ("Accepted".equals(status) || "Unplaced".equals(status) || "Artificial Hybrid".equals(status) ){
295
            clazz = Taxon.class;
296
        }else if ("Synonym".equals(status) || "Orthographic".equals(status)){
297
            clazz = (accId == null)? Taxon.class : Synonym.class;
298
            if("Orthographic".equals(status)){
299
                statusTypes.add(NomenclaturalStatusType.SUPERFLUOUS());
300
//                addStatus(NomenclaturalStatusType.SUPERFLUOUS(), row, isNewName, statusAdded, statusTypes, null);
301
            }
302
        }else if("Illegitimate".equals(status)){
303
            clazz = getIllegInvalidStatus(sourceId, accId);
304
            statusTypes.add(NomenclaturalStatusType.ILLEGITIMATE());
305
//            addStatus(NomenclaturalStatusType.ILLEGITIMATE(), row, isNewName, statusAdded, statusTypes, getSecRef());
306
        }else if ("Invalid".equals(status)){
307
            clazz = getIllegInvalidStatus(sourceId, accId);
308
            statusTypes.add(NomenclaturalStatusType.INVALID());
309
//            addStatus(NomenclaturalStatusType.INVALID(), row, isNewName, statusAdded, statusTypes, getSecRef());
310
        }else{
311
            logger.warn(row + "Unhandled status: " + status);
312
            clazz = Taxon.class;  //to do something
313
        }
314
        return clazz;
315
    }
316

    
317
    private void handleNomenclRemarkAndNameStatus(String nomenclaturalRemarks, String row, boolean isNewName, TaxonName name,
318
            List<NomenclaturalStatusType> statusTypes) {
319

    
320
        NomenclaturalStatusType remarkType = null;
321
        NomenclaturalStatusType statusType = statusTypes.isEmpty()? null: statusTypes.iterator().next();
322
        if (nomenclaturalRemarks == null){
323
           //nothing to do
324
        }else if (", nom. illeg.".equals(nomenclaturalRemarks)){
325
            remarkType = NomenclaturalStatusType.ILLEGITIMATE();
326
        }else if (", nom. cons.".equals(nomenclaturalRemarks)){
327
            remarkType = NomenclaturalStatusType.CONSERVED();
328
        }else if (", nom. nud.".equals(nomenclaturalRemarks)){
329
            remarkType = NomenclaturalStatusType.NUDUM();
330
        }else if (", nom. provis.".equals(nomenclaturalRemarks)){
331
            remarkType = NomenclaturalStatusType.PROVISIONAL();
332
        }else if (", nom. rej.".equals(nomenclaturalRemarks)){
333
            remarkType = NomenclaturalStatusType.REJECTED();
334
        }else if (", nom. subnud.".equals(nomenclaturalRemarks)){
335
            remarkType = NomenclaturalStatusType.SUBNUDUM();
336
        }else if (", nom. superfl.".equals(nomenclaturalRemarks)){
337
            remarkType = NomenclaturalStatusType.SUPERFLUOUS();
338
        }else if (", not validly publ.".equals(nomenclaturalRemarks)){
339
            statusTypes.add(NomenclaturalStatusType.INVALID());
340
        }else if (", opus utique oppr.".equals(nomenclaturalRemarks)){
341
            statusTypes.add(NomenclaturalStatusType.OPUS_UTIQUE_OPPR());
342
        }else {
343
            logger.warn(row + "Unhandled nomenclatural remark: " + nomenclaturalRemarks);
344
        }
345

    
346
        NomenclaturalStatusType kewType = remarkType != null? remarkType : statusType;
347
        if (isNewName){
348
            if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
349
                logger.warn(row + "Kew suggests 2 different nom. status. types for new name. The status from nomenclatural_remarks was taken.");
350
            }
351
            if (kewType != null){
352
                name.addStatus(kewType, getSecRef(), null);
353
            }
354
        }else{
355
            NomenclaturalStatusType existingType = null;
356
            if (!name.getStatus().isEmpty()){
357
                existingType = name.getStatus().iterator().next().getType();
358
            }
359
            if (existingType != null && kewType != null){
360
                if (!existingType.equals(kewType)){
361
                    logger.warn(row + "Existing name status "+existingType.getTitleCache()+" differs from Kew status " + kewType.getTitleCache() + ". Key status ignored");
362
                }
363
            }else if (existingType != null && kewType == null){
364
                logger.warn(row + "Info: Existing name has a name status "+existingType.getTitleCache()+" but Kew name has no status. Existing status kept.");
365
            }else if (existingType == null && kewType != null){
366
                if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
367
                    logger.warn(row + "Existing name has no status while Kew name suggests a status (but 2 different status form status and nomenclatural_remarks field).");
368
                }else{
369
                    logger.warn(row + "Existing name has no status while Kew name suggests a status ("+kewType.getTitleCache()+"). Kew status ignored.");
370
                }
371
            }
372
        }
373
    }
374

    
375
    private void newTransaction(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
376
        commitTransaction(state.getTransactionStatus());
377
        secRef = null;
378
        state.getDeduplicationHelper().reset();
379
        state.setSourceReference(null);
380
        System.gc();
381
        state.setTransactionStatus(startTransaction());
382
    }
383

    
384
    private Reference getSecRef() {
385
        if (secRef == null){
386
            secRef = getReferenceService().find(state.getConfig().getSecUuid());
387
        }
388
        return secRef;
389
    }
390

    
391
    private Class<? extends CdmBase> getIllegInvalidStatus(String sourceId, String accId) {
392
        if (sourceId.equals(accId)){
393
            return Taxon.class;
394
        }else if(accId != null){
395
            return Synonym.class;
396
        }
397
        return null;
398
    }
399

    
400

    
401
    private void testParsedName(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state, TaxonName name,
402
            String row, String fullCitation) throws UndefinedTransformerMethodException {
403
        Map<String, String> record = state.getOriginalRecord();
404

    
405
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
406
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
407
        String rankStr = getValue(record, TAXON_RANK);
408
        String genusHybrid = getValue(record, "genus_hybrid");
409
        String genus = getValue(record, "genus");
410
        String speciesHybrid = getValue(record, "species_hybrid");
411
        String species = getValue(record, "species");
412
        String infraSpecRank = getValue(record, "infraspecific_rank");
413
        String infraspecies = getValue(record, "infraspecies");
414
        String basionymAuthor = getValue(record, "parenthetical_author");
415
        String combinationAuthor = getValue(record, "primary_author");
416
        String authors = getValue(record, "taxon_authors");
417
        String year = getValue(record, "KewYear4CDM");
418
        String pubType = getValue(record, "PubType");
419
        String place_of_publication = getValue(record, "place_of_publication");
420
        String volume_and_page = getValue(record, "volume_and_page");
421

    
422
        if (!CdmUtils.nullSafeEqual(name.getNameCache(), nameCache)){
423
            logger.warn(row + "Unexpected nameCache: " + nameCache);
424
        }
425
        if (!CdmUtils.nullSafeEqual(name.getTitleCache(), fullName)){
426
            logger.warn(row + "Unexpected titleCache: <->" + name.getTitleCache());
427
        }
428
        if (isBlank(genusHybrid) == name.isMonomHybrid()){
429
            logger.warn(row + "Unexpected genus hybrid: " + genusHybrid);
430
        }
431
        if (!CdmUtils.nullSafeEqual(name.getGenusOrUninomial(),genus)){
432
            logger.warn(row + "Unexpected genus: " + genus);
433
        }if (isBlank(speciesHybrid) == name.isBinomHybrid()){
434
            logger.warn(row + "Unexpected species hybrid: " + speciesHybrid);
435
        }
436
        if (!CdmUtils.nullSafeEqual(name.getSpecificEpithet(),species)){
437
            logger.warn(row + "Unexpected species epithet: " + name.getSpecificEpithet() +"<->"+ species);
438
        }
439
        if (!CdmUtils.nullSafeEqual(name.getInfraSpecificEpithet(), infraspecies)){
440
            logger.warn(row + "Unexpected infraspecific epithet: " + name.getInfraSpecificEpithet() +"<->"+ infraspecies);
441
        }
442
        if (!CdmUtils.nullSafeEqual(name.getAuthorshipCache(),authors)){
443
            logger.warn(row + "Unexpected authors: " + name.getAuthorshipCache() +"<->"+ authors);
444
        }
445
        String combinationAndExAuthor = authorTitle(name.getCombinationAuthorship(), name.getExCombinationAuthorship());
446
        if (!CdmUtils.nullSafeEqual(combinationAndExAuthor, combinationAuthor)){
447
            logger.warn(row + "Unexpected combination author: " + combinationAndExAuthor +"<->"+ combinationAuthor);
448
        }
449
        String basionymAndExAuthor = authorTitle(name.getBasionymAuthorship(), name.getExBasionymAuthorship());
450
        if (!CdmUtils.nullSafeEqual(basionymAndExAuthor, basionymAuthor)){
451
            logger.warn(row + "Unexpected basionym author: " + basionymAndExAuthor +"<->"+ basionymAuthor);
452
        }
453
        Rank rank = state.getTransformer().getRankByKey(rankStr);
454
        if (!rank.equals(name.getRank())){
455
            logger.warn(row + "Unexpected rank: " + rankStr);
456
        }
457

    
458
        Reference nomRef = name.getNomenclaturalReference();
459
        if (nomRef == null){
460
            if (fullCitation != null){
461
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
462
                TaxonName parsedName = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
463
                if (parsedName.getNomenclaturalReference() != null){
464
                    name.setNomenclaturalReference(parsedName.getNomenclaturalReference());
465
                    logger.warn(row + "Nom.ref. was missing. Taken from Kew");
466
                }else{
467
                    logger.warn(row + "Nom. ref. is missing or can not be parsed");
468
                }
469
            }else{
470
                logger.warn(row + "NomRef is missing.");
471
            }
472
        }else{
473
            if ("A".equals(pubType) && nomRef.getType() != ReferenceType.Article){
474
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
475
            }
476
            if ("B".equals(pubType) && nomRef.getType() != ReferenceType.Book){
477
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
478
            }
479
            year = normalizeYear(year);
480
            if (!CdmUtils.nullSafeEqual(year, nomRef.getDatePublishedString())){
481
                logger.warn(row + "Unexpected year: " + year + "<->" + nomRef.getDatePublishedString());
482
            }
483
            if (volume_and_page != null && !name.getFullTitleCache().contains(volume_and_page)){
484
                logger.warn(row + "volume_and_page not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ volume_and_page);
485
            }
486
            if (place_of_publication != null && !name.getFullTitleCache().contains(place_of_publication)){
487
                logger.warn(row + "place_of_publication not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ place_of_publication);
488
            }
489
        }
490
        if ("subsp.".equals(infraSpecRank) && !rank.equals(Rank.SUBSPECIES())){
491
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
492
        }else if ("var.".equals(infraSpecRank) && !rank.equals(Rank.VARIETY())){
493
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
494
        }else if ("f.".equals(infraSpecRank) && !rank.equals(Rank.FORM())){
495
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
496
        }
497
    }
498

    
499
    private String authorTitle(TeamOrPersonBase<?> author, TeamOrPersonBase<?> exAuthor) {
500
        String authorStr = author == null? null: author.getNomenclaturalTitleCache();
501
        String exAuthorStr = exAuthor == null? null: exAuthor.getNomenclaturalTitleCache();
502
        return CdmUtils.concat(" ex ", exAuthorStr, authorStr);
503
    }
504

    
505
    private String normalizeYear(String year) {
506
        if (year == null){
507
            return null;
508
        }else if (year.contains("\" [")){
509
            String[] split = year.split("\" \\[");
510
            year = split[1].replace("]","") + " [" + split[0]+"\"]";
511
        }else if ("?".equals(year)){
512
            return null;
513
        }
514
        return year;
515
    }
516

    
517
    @Override
518
    protected void secondPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
519
        Map<String, String> record = state.getOriginalRecord();
520
        int line = state.getCurrentLine();
521
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
522
        String status = getValue(record, TAXON_STATUS);
523
        String sourceId = getValue(record, PLANT_NAME_ID);
524
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
525
        String accName = getValue(record, "AcceptedName");
526
        String basionymId = getValue(record, "basionym_plant_name_id");
527
        String homotypicSynonym = getValue(record, "homotypic_synonym");
528

    
529
        String row = String.valueOf(line) + "("+fullName+"): ";
530
        try {
531
            if ((line % 500) == 0){
532
                newTransaction(state);
533
                System.out.println(line);
534
            }
535

    
536
            if("Misapplied".equals(status)){
537
                return;
538
            }else if (neglectedRecords.contains(sourceId)){
539
                logger.info(row + "Record ignored.");
540
                return;
541
            }
542

    
543
            UUID uuid = taxonMapping.get(sourceId);
544
            TaxonBase<?> taxonBase = getTaxonService().find(uuid);
545
            if (taxonBase == null){
546
                logger.warn(row + "taxonBase not found: " + sourceId);
547
                return;
548
            }
549

    
550
            UUID accUuid = taxonMapping.get(accId);
551
            boolean hasAccepted = !sourceId.equals(accId);
552

    
553
            Taxon accTaxon = null;
554
            TaxonNode parent = null;
555
            Taxon child = null;
556
            Synonym syn = null;
557
            boolean isSynonymAccepted = false;
558

    
559
            if(accId == null){
560
                logger.info(row + "accID is null");
561
                child = CdmBase.deproxy(taxonBase, Taxon.class);
562
            }else if(hasAccepted){
563
                TaxonBase<?> accTaxonBase = getTaxonService().find(accUuid);
564
                if (accTaxonBase == null){
565
                    logger.warn(row + "acctaxon not found: " + accId + "; " + accName);
566
                }else if(!accTaxonBase.isInstanceOf(Taxon.class)){
567
                    logger.warn(row + "acctaxon is synonym: " + accId + "; " + accName);
568
                    isSynonymAccepted = true;
569
                }else{
570
                    accTaxon = CdmBase.deproxy(accTaxonBase, Taxon.class);
571
                    if (!accTaxon.getName().getTitleCache().equals(accName)){
572
                        logger.warn(row + "Accepted name differs: " + accName +" <-> "+ accTaxon.getName().getTitleCache());
573
                    }
574
                }
575
            }else if (sourceId.equals(accId)){
576
                if (!taxonBase.isInstanceOf(Taxon.class)){
577
                    logger.warn(row + "child not of class Taxon: " + sourceId);
578
                }else{
579
                    Rank rank = taxonBase.getName().getRank();
580
                    child = CdmBase.deproxy(taxonBase, Taxon.class);
581
                    if(rank.equals(Rank.GENUS())){
582
                        parent = getFamily();
583
                    }else if (rank.equals(Rank.SPECIES())){
584
                        String genus = child.getName().getGenusOrUninomial();
585
                        UUID parentUuid = taxonMapping.get(genus);
586
                        parent = getParent(parentUuid, row);
587
                    }else if (rank.isLowerThan(RankClass.Species)){
588
                        String speciesName = child.getName().getGenusOrUninomial() + " " + child.getName().getSpecificEpithet();
589
                        UUID parentUuid = taxonMapping.get(speciesName);
590
                        parent = getParent(parentUuid, row);
591
                    }
592
                }
593
            }
594

    
595
            if (taxonBase.isInstanceOf(Synonym.class)){
596
                syn = CdmBase.deproxy(taxonBase, Synonym.class);
597
            }
598

    
599
            if ("Accepted".equals(status)){
600
                if (parent == null){
601
                    logger.warn(row + "Parent is missing. Taxon is moved to 'unresolved' instead'");
602
                    parent = unresolvedParent();
603
                }
604
                if (child == null){
605
                    logger.warn(row + "Child is missing. Taxon not imported.");
606
                }else{
607
                    if (!child.getTaxonNodes().isEmpty()){
608
                        if(!child.getName().getRank().equals(Rank.GENUS())){
609
                            logger.warn(row + "Taxon already has a parent. Taxon not attached to any further parent taxon.");
610
                        }
611
                    }else{
612
                        addChild(parent, child, row);
613
                    }
614
                }
615
            }else if ("Synonym".equals(status)){
616
                if(accTaxon == null){
617
                    if(isSynonymAccepted){
618
                        logger.warn(row +  "Synonym added to 'unresolved' as accepted taxon is synonym itself.");
619
                    }else if (accId != null){
620
                        logger.warn(row +  "Accepted taxon for synonym unexpectedly does not exist (it seems not to be a synonym itself). Synonym moved to 'unresolved'");
621
                    }else{
622
                        logger.warn(row +  "No accepted taxon given for synonym. Therefore taxon moved to 'unresolved'");
623
                    }
624
                    if(accId != null){
625
                        child = Taxon.NewInstance(syn.getName(), syn.getSec());
626
                        child.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
627
                    }
628
                    addChild(unresolvedParent(), child, row);
629
                    getTaxonService().deleteSynonym(syn, new SynonymDeletionConfigurator());
630
                }else{
631
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF);
632
                }
633
            }else if ("Unplaced".equals(status)){
634
                parent = unresolvedParent();
635
                addChild(parent, child, row);
636
            }else if ("Artificial Hybrid".equals(status)){
637
                parent = hybridParent();
638
                addChild(parent, child, row);
639
            }else if ("Orthographic".equals(status)){
640
                if(accTaxon == null){
641
                    logger.warn(row + "'Orthographic' taxon has no acc taxon");
642
                }else{
643
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF);
644
                }
645
            }else if("Illegitimate".equals(status) || "Invalid".equals(status)){
646
                if (hasAccepted){
647
                    if(accTaxon == null){
648
                        logger.warn(row + "accepted taxon for illegitimate or invalid taxon not found");
649
                    }else{
650
                        accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF);
651
                    }
652
                }else{
653
                    addChild(unresolvedParent(), child, row);
654
                }
655
            }else{
656
                logger.warn(row + "Unhandled status: " +  status);
657
            }
658

    
659
            if (basionymId != null && false){
660
                UUID basionymUuid = taxonMapping.get(basionymId);
661
                TaxonBase<?> basionymTaxon = getTaxonService().find(basionymUuid);
662
                if (basionymTaxon != null){
663
                    if (hasSameAcceptedTaxon(taxonBase, basionymTaxon)){
664
                        if (taxonBase.getName().getBasionym() == null){
665
                            taxonBase.getName().addBasionym(basionymTaxon.getName());
666
                        }
667
                    }else{
668
                        logger.warn(row + "Basionym has not same accepted taxon and therefore was ignored.");
669
                    }
670
                }else{
671
                    logger.warn(row + "Basionym "+basionymId+" not found.");
672
                }
673
            }
674
        } catch (Exception e) {
675
            logger.error(row + "Error.");
676
            e.printStackTrace();
677
        }
678
    }
679

    
680
    private boolean hasSameAcceptedTaxon(TaxonBase<?> taxonBase, TaxonBase<?> basionymTaxon) {
681
        if (taxonBase.isInstanceOf(Synonym.class)){
682
            taxonBase = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
683
        }
684
        if (basionymTaxon.isInstanceOf(Synonym.class)){
685
            basionymTaxon = CdmBase.deproxy(basionymTaxon, Synonym.class).getAcceptedTaxon();
686
        }
687
        return taxonBase != null && basionymTaxon != null && taxonBase.equals(basionymTaxon);
688
    }
689

    
690
    private TaxonNode getParent(UUID parentUuid, String row) {
691
        if(parentUuid == null){
692
            logger.warn(row + "Parent uuid is null. No parent found.");
693
            return null;
694
        }
695
        TaxonBase<?> pTaxon = getTaxonService().find(parentUuid);
696
        if (pTaxon == null){
697
            logger.warn(row + "No parent found for parent UUID. This should not happen.");
698
            return null;
699
        }
700
        if (pTaxon.isInstanceOf(Synonym.class)){
701
            logger.warn(row + "Parent is synonym");
702
            return null;
703
        }else{
704
            Taxon ptax = CdmBase.deproxy(pTaxon, Taxon.class);
705
            if(ptax.getTaxonNodes().isEmpty()){
706
                logger.warn(row + "Parent has no node yet");
707
                return null;
708
            }else {
709
                if(ptax.getTaxonNodes().size()>1){
710
                    logger.warn("Parent has >1 nodes. Take arbitrary one");
711
                }
712
                return ptax.getTaxonNodes().iterator().next();
713
            }
714
        }
715
    }
716

    
717
    private void addChild(TaxonNode parent, Taxon child, String row) {
718
        if (parent == null){
719
            logger.warn(row + "Parent is null");
720
        }else if (child == null){
721
            logger.warn(row + "Child is null");
722
        }else{
723
            if (!child.getTaxonNodes().isEmpty()){
724
                TaxonNode childNode = child.getTaxonNodes().iterator().next();
725
                if (childNode.getParent() != null && childNode.getParent().equals(parent)){
726
                    logger.info(row + "Parent-child relation exists already.");
727
                }else{
728
                    logger.warn(row + "Child already has different parent. Parent-child relation not added.");
729
                }
730
            }else{
731
                TaxonNode node = parent.addChildTaxon(child, null, null);
732
                getTaxonNodeService().saveOrUpdate(node);
733
            }
734
        }
735
    }
736

    
737
    private TaxonNode getFamily(){
738
        UUID uuid = UUID.fromString("0334809a-aa20-447d-add9-138194f80f56");
739
        TaxonNode aizoaceae = getTaxonNodeService().find(uuid);
740
        return aizoaceae;
741
    }
742

    
743
    private TaxonNode hybridParent(){
744
        UUID uuid = UUID.fromString("2fae0fa1-758a-4fcb-bb6c-a2bd11f40641");
745
        TaxonNode hybridParent = getTaxonNodeService().find(uuid);
746
        return hybridParent;
747
    }
748
    private TaxonNode unresolvedParent(){
749
        UUID uuid = UUID.fromString("accb1ff6-5748-4b18-b529-9368c331a38d");
750
        TaxonNode unresolvedParent = getTaxonNodeService().find(uuid);
751
        return unresolvedParent;
752
    }
753
}
(1-1/18)