Project

General

Profile

Download (35.2 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2020 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

    
11
import java.util.ArrayList;
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.Iterator;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19

    
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.api.service.config.SynonymDeletionConfigurator;
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
26
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
27
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
28
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
29
import eu.etaxonomy.cdm.model.common.CdmBase;
30
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
31
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
32
import eu.etaxonomy.cdm.model.name.Rank;
33
import eu.etaxonomy.cdm.model.name.TaxonName;
34
import eu.etaxonomy.cdm.model.reference.Reference;
35
import eu.etaxonomy.cdm.model.reference.ReferenceType;
36
import eu.etaxonomy.cdm.model.taxon.Synonym;
37
import eu.etaxonomy.cdm.model.taxon.SynonymType;
38
import eu.etaxonomy.cdm.model.taxon.Taxon;
39
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
41
import eu.etaxonomy.cdm.model.term.DefinedTerm;
42
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
43

    
44
/**
45
 * @author a.mueller
46
 * @since 17.02.2020
47
 */
48
@Component
49
public class CaryoAizoaceaeExcelImport extends SimpleExcelTaxonImport<CaryoAizoaceaeExcelImportConfigurator>{
50

    
51
    private static final long serialVersionUID = -729761811965260921L;
52
    private static final Logger logger = Logger.getLogger(CaryoAizoaceaeExcelImport.class);
53

    
54
    private static final String ACCEPTED_PLANT_NAME_ID = "accepted_plant_name_id";
55
    private static final String NOMENCLATURAL_REMARKS = "nomenclatural_remarks";
56
    private static final String TAXON_RANK = "taxon_rank";
57
    private static final String NAME_CIT = "NameCit";
58
    private static final String KEW_NAME4CDM_LINK = "KewName4CDMLink";
59
    private static final String KEW_F_NAME4CDM_LINK = "KewFName4CDMLink";
60
    private static final String TAXON_STATUS = "taxon_status";
61
    private static final String PLANT_NAME_ID = "plant_name_id";
62
    private static final String IPNI_ID = "ipni_id";
63

    
64
    private Map<String, UUID> taxonMapping = new HashMap<>();
65
    private Reference secRef = null;
66
    private Set<String> neglectedRecords = new HashSet<>();
67
    private Set<UUID> createdNames = new HashSet<>();
68

    
69
    private SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state;
70

    
71
    @Override
72
    protected void firstPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
73
        int line = state.getCurrentLine();
74
        if ((line % 500) == 0){
75
            newTransaction(state);
76
            System.out.println(line);
77
        }
78

    
79
        this.state = state;
80
        Map<String, String> record = state.getOriginalRecord();
81

    
82
        String fullCitation = getValue(record, NAME_CIT);
83
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
84
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
85
        String status = getValue(record, TAXON_STATUS);
86
        String sourceId = getValue(record, PLANT_NAME_ID);
87
        String ipniId = getValue(record, IPNI_ID);
88
        String rankStr = getValue(record, TAXON_RANK);
89
        String nomenclaturalRemarks = getValue(record, NOMENCLATURAL_REMARKS);
90
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
91

    
92
        String row = String.valueOf(line) + "("+fullName+"): ";
93

    
94
        if("Misapplied".equals(status)){
95
            neglectedRecords.add(sourceId);
96
            return;
97
        }
98

    
99
        boolean isNewName = false;
100

    
101
        try {
102

    
103
            List<NomenclaturalStatusType> statusTypes = new ArrayList<>();
104
            Class<? extends CdmBase> clazz = makeStatus(status, sourceId, accId, row, statusTypes);
105

    
106
            TaxonName name;
107
            Rank rank = state.getTransformer().getRankByKey(rankStr);
108
            List<TaxonName> existingNames = getNameService().getNamesByNameCache(nameCache);
109
            Iterator<TaxonName> it = existingNames.iterator();
110
            while (it.hasNext()){
111
                TaxonName next = it.next();
112
                if (createdNames.contains(next.getUuid())){
113
                    it.remove();
114
                }
115
            }
116

    
117
            List<TaxonName> fullNameMatches = new ArrayList<>();
118

    
119
            @SuppressWarnings("rawtypes")
120
            List<TaxonBase> allFullNameTaxa = new ArrayList<>();
121
            @SuppressWarnings("rawtypes")
122
            List<TaxonBase> allNameCacheTaxa = new ArrayList<>();
123

    
124
            for (TaxonName existingName : existingNames){
125
                if (existingName.getTitleCache().equals(fullName)){
126
                    fullNameMatches.add(existingName);
127
                    allFullNameTaxa.addAll(existingName.getTaxonBases());
128
                }
129
                allNameCacheTaxa.addAll(existingName.getTaxonBases());
130
            }
131

    
132
            logMultipleCandidates(row, existingNames, fullNameMatches);
133

    
134
            TaxonBase<?> existingTaxon;
135
            if(allFullNameTaxa.size()>1){
136
                existingTaxon = findBestMatchingTaxon(allFullNameTaxa, clazz, row);
137
                name = existingTaxon.getName();
138
            }else if (allFullNameTaxa.size()==1){
139
                existingTaxon = allFullNameTaxa.iterator().next();
140
                name = existingTaxon.getName();
141
            }else{
142
                existingTaxon = null;
143
                if (!fullNameMatches.isEmpty()){
144
                    logger.warn(row + "None of the existing names exists as taxon/synonym. Existing name taken as base for new taxon/synonym created.");
145
                    if (fullNameMatches.size()>1){
146
                        logger.warn(row + "More than 1 matching full names exist as candidats for new taxon/synonym. Arbitrary one taken.");
147
                    }
148
                    name = fullNameMatches.iterator().next();
149
                }else if (!existingNames.isEmpty()){
150
                    if (!allNameCacheTaxa.isEmpty()){
151
                        logger.warn(row + "Taxa exist with matching nameCache but not matching fullname cache. New name and new taxon/synonym created. Other authors are " + getOtherAuthors(existingNames));
152
                        name = null;
153
                    }else{
154
                        logger.warn(row + "No matching fullnames exist but namecache matches. None of the matches is used in a taxon/synonym. Other authors are " + getOtherAuthors(existingNames));
155
                        name = null;
156
                    }
157
                }else{
158
                    name = null;
159
                }
160
            }
161

    
162
            if (existingTaxon == null){
163
                if (rank == null){
164
                    logger.warn(row + "Name has no rank " + nameCache);
165
                }else if (rank.equals(Rank.GENUS())){
166
                    logger.warn(row + "No name exists for genus " + nameCache + ". This is unexpected.");
167
                }
168
            }else{
169
                if (existingTaxon.isInstanceOf(Taxon.class)){
170
                    if (!CdmBase.deproxy(existingTaxon, Taxon.class).getTaxonNodes().isEmpty()){
171
                        neglectedRecords.add(sourceId);
172
                    }
173
                }else{
174
                    Taxon taxon = CdmBase.deproxy(existingTaxon, Synonym.class).getAcceptedTaxon();
175
                    if (taxon != null && !taxon.getTaxonNodes().isEmpty()){
176
                        neglectedRecords.add(sourceId);
177
                    }
178
                }
179
            }
180
            if (name == null){
181
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
182
                name = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
183
                if (name.isProtectedFullTitleCache() || name.isProtectedTitleCache() || name.isProtectedNameCache()
184
                        || name.isProtectedAuthorshipCache()){
185
                    logger.warn(row + "Name not parsable: " + fullCitation);
186
                    name.setTitleCache(fullName, true);
187
                    name.setNameCache(nameCache, true);
188
                }else{
189
                    testParsedName(state, name, row, null);
190
                }
191
                name.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
192
                name = dedupliateNameParts(name);
193
                getNameService().saveOrUpdate(name);
194
                isNewName = true;
195
                createdNames.add(name.getUuid());
196
            }else{
197
                testParsedName(state, name, row, fullCitation);
198
            }
199

    
200
            handleNomenclRemarkAndNameStatus(nomenclaturalRemarks, row, isNewName, name, statusTypes);
201

    
202
            TaxonBase<?> taxonBase = existingTaxon;
203

    
204
            if (taxonBase == null){
205
                if (clazz == Taxon.class){
206
                    taxonBase = Taxon.NewInstance(name, getSecRef());
207
                }else{
208
                    taxonBase = Synonym.NewInstance(name, getSecRef());
209
                }
210
                taxonBase.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
211
                getTaxonService().saveOrUpdate(taxonBase);
212
            }
213

    
214
            if (!isBlank(ipniId)){
215
                DefinedTerm ipniIdIdentifierType = DefinedTerm.IDENTIFIER_NAME_IPNI();
216
                name.addIdentifier(ipniId, ipniIdIdentifierType);
217
            }else{
218
                logger.warn(row + "IPNI id is missing.");
219
            }
220

    
221
            taxonMapping.put(sourceId, taxonBase.getUuid());
222
//            if("Accepted".equals(status)){
223
            if(taxonBase.isInstanceOf(Taxon.class)){
224
                    UUID existingUuid = taxonMapping.put(name.getNameCache(), taxonBase.getUuid());
225
                if (existingUuid != null){
226
                    logger.warn(row + name.getNameCache() + " has multiple instances in file");
227
                }
228
            }
229
        } catch (UndefinedTransformerMethodException e) {
230
            e.printStackTrace();
231
        }
232
    }
233

    
234
    private TaxonName dedupliateNameParts(TaxonName name) {
235
        if (state.getConfig().isDoDeduplicate()){
236
            state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
237
        }
238
        return name;
239
    }
240

    
241
    private String getOtherAuthors(List<TaxonName> otherNames) {
242
        String result = "";
243
        for (TaxonName name : otherNames){
244
            result = CdmUtils.concat(";", result, name.getAuthorshipCache());
245
        }
246
        return result;
247
    }
248

    
249
    private TaxonBase<?> findBestMatchingTaxon(@SuppressWarnings("rawtypes") List<TaxonBase> allFullNameTaxa,
250
            Class<? extends CdmBase> clazz, String row) {
251

    
252
        TaxonBase<?> result = null;
253
        TaxonBase<?> otherStatus = null;
254
        for (TaxonBase<?> taxonBase : allFullNameTaxa) {
255
            if (taxonBase.isInstanceOf(clazz)){
256
                if (result != null){
257
                    logger.warn(row + "More than 1 taxon with matching full name AND matching status exists. This is not further handled. Arbitrary one taken.");
258
                }
259
                result = taxonBase;
260
            }else{
261
                otherStatus = taxonBase;
262
            }
263
        }
264
        if (result == null && allFullNameTaxa.size()>1){
265
            logger.warn(row + "More than 1 taxon with matching fullname but NOT matching status exists. This is not further handled. Arbitrary one taken.");
266
        }
267
        return result == null? otherStatus :result ;
268
    }
269

    
270
    private void logMultipleCandidates(String row, List<TaxonName> existingNames, List<TaxonName> fullNameMatches) {
271
        if(fullNameMatches.size()>1){
272
            String message = row + "More than one name with matching full name exists in DB. Try to take best matching.";
273
            if (existingNames.size()>fullNameMatches.size()){
274
                message += " Additionally names with matching name cache exist.";
275
            }
276
            logger.warn(message);
277
        }else if (existingNames.size()>1){
278
            String message = row + "More than one name with matching nameCache exists in DB. ";
279
            if(fullNameMatches.isEmpty()){
280
                message += "But none matches full name.";
281
            }else{
282
                message += "But exactly 1 matches full name.";
283
            }
284
            logger.warn(message);
285
        }
286
    }
287

    
288
    private Class<? extends CdmBase> makeStatus(String status, String sourceId,
289
            String accId, String row, List<NomenclaturalStatusType> statusTypes) {
290

    
291
        Class<? extends CdmBase> clazz;
292
        if ("Accepted".equals(status) || "Unplaced".equals(status) || "Artificial Hybrid".equals(status) ){
293
            clazz = Taxon.class;
294
        }else if ("Synonym".equals(status) || "Orthographic".equals(status)){
295
            clazz = (accId == null)? Taxon.class : Synonym.class;
296
            if("Orthographic".equals(status)){
297
                statusTypes.add(NomenclaturalStatusType.SUPERFLUOUS());
298
//                addStatus(NomenclaturalStatusType.SUPERFLUOUS(), row, isNewName, statusAdded, statusTypes, null);
299
            }
300
        }else if("Illegitimate".equals(status)){
301
            clazz = getIllegInvalidStatus(sourceId, accId);
302
            statusTypes.add(NomenclaturalStatusType.ILLEGITIMATE());
303
//            addStatus(NomenclaturalStatusType.ILLEGITIMATE(), row, isNewName, statusAdded, statusTypes, getSecRef());
304
        }else if ("Invalid".equals(status)){
305
            clazz = getIllegInvalidStatus(sourceId, accId);
306
            statusTypes.add(NomenclaturalStatusType.INVALID());
307
//            addStatus(NomenclaturalStatusType.INVALID(), row, isNewName, statusAdded, statusTypes, getSecRef());
308
        }else{
309
            logger.warn(row + "Unhandled status: " + status);
310
            clazz = Taxon.class;  //to do something
311
        }
312
        return clazz;
313
    }
314

    
315
    private void handleNomenclRemarkAndNameStatus(String nomenclaturalRemarks, String row, boolean isNewName, TaxonName name,
316
            List<NomenclaturalStatusType> statusTypes) {
317

    
318
        NomenclaturalStatusType remarkType = null;
319
        NomenclaturalStatusType statusType = statusTypes.isEmpty()? null: statusTypes.iterator().next();
320
        if (nomenclaturalRemarks == null){
321
           //nothing to do
322
        }else if (", nom. illeg.".equals(nomenclaturalRemarks)){
323
            remarkType = NomenclaturalStatusType.ILLEGITIMATE();
324
        }else if (", nom. cons.".equals(nomenclaturalRemarks)){
325
            remarkType = NomenclaturalStatusType.CONSERVED();
326
        }else if (", nom. nud.".equals(nomenclaturalRemarks)){
327
            remarkType = NomenclaturalStatusType.NUDUM();
328
        }else if (", nom. provis.".equals(nomenclaturalRemarks)){
329
            remarkType = NomenclaturalStatusType.PROVISIONAL();
330
        }else if (", nom. rej.".equals(nomenclaturalRemarks)){
331
            remarkType = NomenclaturalStatusType.REJECTED();
332
        }else if (", nom. subnud.".equals(nomenclaturalRemarks)){
333
            remarkType = NomenclaturalStatusType.SUBNUDUM();
334
        }else if (", nom. superfl.".equals(nomenclaturalRemarks)){
335
            remarkType = NomenclaturalStatusType.SUPERFLUOUS();
336
        }else if (", not validly publ.".equals(nomenclaturalRemarks)){
337
            statusTypes.add(NomenclaturalStatusType.INVALID());
338
        }else if (", opus utique oppr.".equals(nomenclaturalRemarks)){
339
            statusTypes.add(NomenclaturalStatusType.OPUS_UTIQUE_OPPR());
340
        }else {
341
            logger.warn(row + "Unhandled nomenclatural remark: " + nomenclaturalRemarks);
342
        }
343

    
344
        NomenclaturalStatusType kewType = remarkType != null? remarkType : statusType;
345
        if (isNewName){
346
            if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
347
                logger.warn(row + "Kew suggests 2 different nom. status. types for new name. The status from nomenclatural_remarks was taken.");
348
            }
349
            if (kewType != null){
350
                name.addStatus(kewType, getSecRef(), null);
351
            }
352
        }else{
353
            NomenclaturalStatusType existingType = null;
354
            if (!name.getStatus().isEmpty()){
355
                existingType = name.getStatus().iterator().next().getType();
356
            }
357
            if (existingType != null && kewType != null){
358
                if (!existingType.equals(kewType)){
359
                    logger.warn(row + "Existing name status "+existingType.getTitleCache()+" differs from Kew status " + kewType.getTitleCache() + ". Key status ignored");
360
                }
361
            }else if (existingType != null && kewType == null){
362
                logger.warn(row + "Info: Existing name has a name status "+existingType.getTitleCache()+" but Kew name has no status. Existing status kept.");
363
            }else if (existingType == null && kewType != null){
364
                if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
365
                    logger.warn(row + "Existing name has no status while Kew name suggests a status (but 2 different status form status and nomenclatural_remarks field).");
366
                }else{
367
                    logger.warn(row + "Existing name has no status while Kew name suggests a status ("+kewType.getTitleCache()+"). Kew status ignored.");
368
                }
369
            }
370
        }
371
    }
372

    
373
    private void newTransaction(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
374
        commitTransaction(state.getTransactionStatus());
375
        secRef = null;
376
        state.getDeduplicationHelper().reset();
377
        state.setSourceReference(null);
378
        System.gc();
379
        state.setTransactionStatus(startTransaction());
380
    }
381

    
382
    private Reference getSecRef() {
383
        if (secRef == null){
384
            secRef = getReferenceService().find(state.getConfig().getSecUuid());
385
        }
386
        return secRef;
387
    }
388

    
389
    private Class<? extends CdmBase> getIllegInvalidStatus(String sourceId, String accId) {
390
        if (sourceId.equals(accId)){
391
            return Taxon.class;
392
        }else if(accId != null){
393
            return Synonym.class;
394
        }
395
        return null;
396
    }
397

    
398

    
399
    private void testParsedName(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state, TaxonName name,
400
            String row, String fullCitation) throws UndefinedTransformerMethodException {
401
        Map<String, String> record = state.getOriginalRecord();
402

    
403
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
404
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
405
        String rankStr = getValue(record, TAXON_RANK);
406
        String genusHybrid = getValue(record, "genus_hybrid");
407
        String genus = getValue(record, "genus");
408
        String speciesHybrid = getValue(record, "species_hybrid");
409
        String species = getValue(record, "species");
410
        String infraSpecRank = getValue(record, "infraspecific_rank");
411
        String infraspecies = getValue(record, "infraspecies");
412
        String basionymAuthor = getValue(record, "parenthetical_author");
413
        String combinationAuthor = getValue(record, "primary_author");
414
        String authors = getValue(record, "taxon_authors");
415
        String year = getValue(record, "KewYear4CDM");
416
        String pubType = getValue(record, "PubType");
417
        String place_of_publication = getValue(record, "place_of_publication");
418
        String volume_and_page = getValue(record, "volume_and_page");
419

    
420
        if (!CdmUtils.nullSafeEqual(name.getNameCache(), nameCache)){
421
            logger.warn(row + "Unexpected nameCache: " + nameCache);
422
        }
423
        if (!CdmUtils.nullSafeEqual(name.getTitleCache(), fullName)){
424
            logger.warn(row + "Unexpected titleCache: <->" + name.getTitleCache());
425
        }
426
        if (isBlank(genusHybrid) == name.isMonomHybrid()){
427
            logger.warn(row + "Unexpected genus hybrid: " + genusHybrid);
428
        }
429
        if (!CdmUtils.nullSafeEqual(name.getGenusOrUninomial(),genus)){
430
            logger.warn(row + "Unexpected genus: " + genus);
431
        }if (isBlank(speciesHybrid) == name.isBinomHybrid()){
432
            logger.warn(row + "Unexpected species hybrid: " + speciesHybrid);
433
        }
434
        if (!CdmUtils.nullSafeEqual(name.getSpecificEpithet(),species)){
435
            logger.warn(row + "Unexpected species epithet: " + name.getSpecificEpithet() +"<->"+ species);
436
        }
437
        if (!CdmUtils.nullSafeEqual(name.getInfraSpecificEpithet(), infraspecies)){
438
            logger.warn(row + "Unexpected infraspecific epithet: " + name.getInfraSpecificEpithet() +"<->"+ infraspecies);
439
        }
440
        if (!CdmUtils.nullSafeEqual(name.getAuthorshipCache(),authors)){
441
            logger.warn(row + "Unexpected authors: " + name.getAuthorshipCache() +"<->"+ authors);
442
        }
443
        String combinationAndExAuthor = authorTitle(name.getCombinationAuthorship(), name.getExCombinationAuthorship());
444
        if (!CdmUtils.nullSafeEqual(combinationAndExAuthor, combinationAuthor)){
445
            logger.warn(row + "Unexpected combination author: " + combinationAndExAuthor +"<->"+ combinationAuthor);
446
        }
447
        String basionymAndExAuthor = authorTitle(name.getBasionymAuthorship(), name.getExBasionymAuthorship());
448
        if (!CdmUtils.nullSafeEqual(basionymAndExAuthor, basionymAuthor)){
449
            logger.warn(row + "Unexpected basionym author: " + basionymAndExAuthor +"<->"+ basionymAuthor);
450
        }
451
        Rank rank = state.getTransformer().getRankByKey(rankStr);
452
        if (!rank.equals(name.getRank())){
453
            logger.warn(row + "Unexpected rank: " + rankStr);
454
        }
455

    
456
        Reference nomRef = name.getNomenclaturalReference();
457
        if (nomRef == null){
458
            if (fullCitation != null){
459
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
460
                TaxonName parsedName = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
461
                if (parsedName.getNomenclaturalReference() != null){
462
                    name.setNomenclaturalReference(parsedName.getNomenclaturalReference());
463
                    logger.warn(row + "Nom.ref. was missing. Taken from Kew");
464
                }else{
465
                    logger.warn(row + "Nom. ref. is missing or can not be parsed");
466
                }
467
            }else{
468
                logger.warn(row + "NomRef is missing.");
469
            }
470
        }else{
471
            if ("A".equals(pubType) && nomRef.getType() != ReferenceType.Article){
472
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
473
            }
474
            if ("B".equals(pubType) && nomRef.getType() != ReferenceType.Book){
475
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
476
            }
477
            year = normalizeYear(year);
478
            if (!CdmUtils.nullSafeEqual(year, nomRef.getDatePublishedString())){
479
                logger.warn(row + "Unexpected year: " + year + "<->" + nomRef.getDatePublishedString());
480
            }
481
            if (volume_and_page != null && !name.getFullTitleCache().contains(volume_and_page)){
482
                logger.warn(row + "volume_and_page not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ volume_and_page);
483
            }
484
            if (place_of_publication != null && !name.getFullTitleCache().contains(place_of_publication)){
485
                logger.warn(row + "place_of_publication not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ place_of_publication);
486
            }
487
        }
488
        if ("subsp.".equals(infraSpecRank) && !rank.equals(Rank.SUBSPECIES())){
489
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
490
        }else if ("var.".equals(infraSpecRank) && !rank.equals(Rank.VARIETY())){
491
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
492
        }else if ("f.".equals(infraSpecRank) && !rank.equals(Rank.FORM())){
493
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
494
        }
495
    }
496

    
497
    private String authorTitle(TeamOrPersonBase<?> author, TeamOrPersonBase<?> exAuthor) {
498
        String authorStr = author == null? null: author.getNomenclaturalTitleCache();
499
        String exAuthorStr = exAuthor == null? null: exAuthor.getNomenclaturalTitleCache();
500
        return CdmUtils.concat(" ex ", exAuthorStr, authorStr);
501
    }
502

    
503
    private String normalizeYear(String year) {
504
        if (year == null){
505
            return null;
506
        }else if (year.contains("\" [")){
507
            String[] split = year.split("\" \\[");
508
            year = split[1].replace("]","") + " [" + split[0]+"\"]";
509
        }else if ("?".equals(year)){
510
            return null;
511
        }
512
        return year;
513
    }
514

    
515
    @Override
516
    protected void secondPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
517
        Map<String, String> record = state.getOriginalRecord();
518
        int line = state.getCurrentLine();
519
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
520
        String status = getValue(record, TAXON_STATUS);
521
        String sourceId = getValue(record, PLANT_NAME_ID);
522
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
523
        String accName = getValue(record, "AcceptedName");
524
        String basionymId = getValue(record, "basionym_plant_name_id");
525
        String homotypicSynonym = getValue(record, "homotypic_synonym");
526

    
527
        String row = String.valueOf(line) + "("+fullName+"): ";
528
        try {
529
            if ((line % 500) == 0){
530
                newTransaction(state);
531
                System.out.println(line);
532
            }
533

    
534
            if("Misapplied".equals(status)){
535
                return;
536
            }else if (neglectedRecords.contains(sourceId)){
537
                logger.info(row + "Record ignored.");
538
                return;
539
            }
540

    
541
            UUID uuid = taxonMapping.get(sourceId);
542
            TaxonBase<?> taxonBase = getTaxonService().find(uuid);
543
            if (taxonBase == null){
544
                logger.warn(row + "taxonBase not found: " + sourceId);
545
                return;
546
            }
547

    
548
            UUID accUuid = taxonMapping.get(accId);
549
            boolean hasAccepted = !sourceId.equals(accId);
550

    
551
            Taxon accTaxon = null;
552
            TaxonNode parent = null;
553
            Taxon child = null;
554
            Synonym syn = null;
555
            boolean isSynonymAccepted = false;
556

    
557
            if(accId == null){
558
                logger.info(row + "accID is null");
559
                child = CdmBase.deproxy(taxonBase, Taxon.class);
560
            }else if(hasAccepted){
561
                TaxonBase<?> accTaxonBase = getTaxonService().find(accUuid);
562
                if (accTaxonBase == null){
563
                    logger.warn(row + "acctaxon not found: " + accId + "; " + accName);
564
                }else if(!accTaxonBase.isInstanceOf(Taxon.class)){
565
                    logger.warn(row + "acctaxon is synonym: " + accId + "; " + accName);
566
                    isSynonymAccepted = true;
567
                }else{
568
                    accTaxon = CdmBase.deproxy(accTaxonBase, Taxon.class);
569
                    if (!accTaxon.getName().getTitleCache().equals(accName)){
570
                        logger.warn(row + "Accepted name differs: " + accName +" <-> "+ accTaxon.getName().getTitleCache());
571
                    }
572
                }
573
            }else if (sourceId.equals(accId)){
574
                if (!taxonBase.isInstanceOf(Taxon.class)){
575
                    logger.warn(row + "child not of class Taxon: " + sourceId);
576
                }else{
577
                    Rank rank = taxonBase.getName().getRank();
578
                    child = CdmBase.deproxy(taxonBase, Taxon.class);
579
                    if(rank.equals(Rank.GENUS())){
580
                        parent = getFamily();
581
                    }else if (rank.equals(Rank.SPECIES())){
582
                        String genus = child.getName().getGenusOrUninomial();
583
                        UUID parentUuid = taxonMapping.get(genus);
584
                        parent = getParent(parentUuid, row);
585
                    }else if (rank.isLower(Rank.SPECIES())){
586
                        String speciesName = child.getName().getGenusOrUninomial() + " " + child.getName().getSpecificEpithet();
587
                        UUID parentUuid = taxonMapping.get(speciesName);
588
                        parent = getParent(parentUuid, row);
589
                    }
590
                }
591
            }
592

    
593
            if (taxonBase.isInstanceOf(Synonym.class)){
594
                syn = CdmBase.deproxy(taxonBase, Synonym.class);
595
            }
596

    
597
            if ("Accepted".equals(status)){
598
                if (parent == null){
599
                    logger.warn(row + "Parent is missing. Taxon is moved to 'unresolved' instead'");
600
                    parent = unresolvedParent();
601
                }
602
                if (child == null){
603
                    logger.warn(row + "Child is missing. Taxon not imported.");
604
                }else{
605
                    if (!child.getTaxonNodes().isEmpty()){
606
                        if(!child.getName().getRank().equals(Rank.GENUS())){
607
                            logger.warn(row + "Taxon already has a parent. Taxon not attached to any further parent taxon.");
608
                        }
609
                    }else{
610
                        addChild(parent, child, row);
611
                    }
612
                }
613
            }else if ("Synonym".equals(status)){
614
                if(accTaxon == null){
615
                    if(isSynonymAccepted){
616
                        logger.warn(row +  "Synonym added to 'unresolved' as accepted taxon is synonym itself.");
617
                    }else if (accId != null){
618
                        logger.warn(row +  "Accepted taxon for synonym unexpectedly does not exist (it seems not to be a synonym itself). Synonym moved to 'unresolved'");
619
                    }else{
620
                        logger.warn(row +  "No accepted taxon given for synonym. Therefore taxon moved to 'unresolved'");
621
                    }
622
                    if(accId != null){
623
                        child = Taxon.NewInstance(syn.getName(), syn.getSec());
624
                        child.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
625
                    }
626
                    addChild(unresolvedParent(), child, row);
627
                    getTaxonService().deleteSynonym(syn, new SynonymDeletionConfigurator());
628
                }else{
629
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
630
                }
631
            }else if ("Unplaced".equals(status)){
632
                parent = unresolvedParent();
633
                addChild(parent, child, row);
634
            }else if ("Artificial Hybrid".equals(status)){
635
                parent = hybridParent();
636
                addChild(parent, child, row);
637
            }else if ("Orthographic".equals(status)){
638
                if(accTaxon == null){
639
                    logger.warn(row + "'Orthographic' taxon has no acc taxon");
640
                }else{
641
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
642
                }
643
            }else if("Illegitimate".equals(status) || "Invalid".equals(status)){
644
                if (hasAccepted){
645
                    if(accTaxon == null){
646
                        logger.warn(row + "accepted taxon for illegitimate or invalid taxon not found");
647
                    }else{
648
                        accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
649
                    }
650
                }else{
651
                    addChild(unresolvedParent(), child, row);
652
                }
653
            }else{
654
                logger.warn(row + "Unhandled status: " +  status);
655
            }
656

    
657
            if (basionymId != null && false){
658
                UUID basionymUuid = taxonMapping.get(basionymId);
659
                TaxonBase<?> basionymTaxon = getTaxonService().find(basionymUuid);
660
                if (basionymTaxon != null){
661
                    if (hasSameAcceptedTaxon(taxonBase, basionymTaxon)){
662
                        if (taxonBase.getName().getBasionym() == null){
663
                            taxonBase.getName().addBasionym(basionymTaxon.getName());
664
                        }
665
                    }else{
666
                        logger.warn(row + "Basionym has not same accepted taxon and therefore was ignored.");
667
                    }
668
                }else{
669
                    logger.warn(row + "Basionym "+basionymId+" not found.");
670
                }
671
            }
672
        } catch (Exception e) {
673
            logger.error(row + "Error.");
674
            e.printStackTrace();
675
        }
676
    }
677

    
678
    private boolean hasSameAcceptedTaxon(TaxonBase<?> taxonBase, TaxonBase<?> basionymTaxon) {
679
        if (taxonBase.isInstanceOf(Synonym.class)){
680
            taxonBase = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
681
        }
682
        if (basionymTaxon.isInstanceOf(Synonym.class)){
683
            basionymTaxon = CdmBase.deproxy(basionymTaxon, Synonym.class).getAcceptedTaxon();
684
        }
685
        return taxonBase != null && basionymTaxon != null && taxonBase.equals(basionymTaxon);
686
    }
687

    
688
    private TaxonNode getParent(UUID parentUuid, String row) {
689
        if(parentUuid == null){
690
            logger.warn(row + "Parent uuid is null. No parent found.");
691
            return null;
692
        }
693
        TaxonBase<?> pTaxon = getTaxonService().find(parentUuid);
694
        if (pTaxon == null){
695
            logger.warn(row + "No parent found for parent UUID. This should not happen.");
696
            return null;
697
        }
698
        if (pTaxon.isInstanceOf(Synonym.class)){
699
            logger.warn(row + "Parent is synonym");
700
            return null;
701
        }else{
702
            Taxon ptax = CdmBase.deproxy(pTaxon, Taxon.class);
703
            if(ptax.getTaxonNodes().isEmpty()){
704
                logger.warn(row + "Parent has no node yet");
705
                return null;
706
            }else {
707
                if(ptax.getTaxonNodes().size()>1){
708
                    logger.warn("Parent has >1 nodes. Take arbitrary one");
709
                }
710
                return ptax.getTaxonNodes().iterator().next();
711
            }
712
        }
713
    }
714

    
715
    private void addChild(TaxonNode parent, Taxon child, String row) {
716
        if (parent == null){
717
            logger.warn(row + "Parent is null");
718
        }else if (child == null){
719
            logger.warn(row + "Child is null");
720
        }else{
721
            if (!child.getTaxonNodes().isEmpty()){
722
                TaxonNode childNode = child.getTaxonNodes().iterator().next();
723
                if (childNode.getParent() != null && childNode.getParent().equals(parent)){
724
                    logger.info(row + "Parent-child relation exists already.");
725
                }else{
726
                    logger.warn(row + "Child already has different parent. Parent-child relation not added.");
727
                }
728
            }else{
729
                TaxonNode node = parent.addChildTaxon(child, null, null);
730
                getTaxonNodeService().saveOrUpdate(node);
731
            }
732
        }
733
    }
734

    
735
    private TaxonNode getFamily(){
736
        UUID uuid = UUID.fromString("0334809a-aa20-447d-add9-138194f80f56");
737
        TaxonNode aizoaceae = getTaxonNodeService().find(uuid);
738
        return aizoaceae;
739
    }
740

    
741
    private TaxonNode hybridParent(){
742
        UUID uuid = UUID.fromString("2fae0fa1-758a-4fcb-bb6c-a2bd11f40641");
743
        TaxonNode hybridParent = getTaxonNodeService().find(uuid);
744
        return hybridParent;
745
    }
746
    private TaxonNode unresolvedParent(){
747
        UUID uuid = UUID.fromString("accb1ff6-5748-4b18-b529-9368c331a38d");
748
        TaxonNode unresolvedParent = getTaxonNodeService().find(uuid);
749
        return unresolvedParent;
750
    }
751
}
(1-1/7)