Project

General

Profile

Download (35.6 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2020 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

    
11
import java.util.ArrayList;
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.Iterator;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19

    
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.api.service.config.SynonymDeletionConfigurator;
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
26
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
27
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
28
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30
import eu.etaxonomy.cdm.model.common.CdmBase;
31
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
32
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
33
import eu.etaxonomy.cdm.model.name.Rank;
34
import eu.etaxonomy.cdm.model.name.TaxonName;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.reference.ReferenceType;
37
import eu.etaxonomy.cdm.model.taxon.Synonym;
38
import eu.etaxonomy.cdm.model.taxon.SynonymType;
39
import eu.etaxonomy.cdm.model.taxon.Taxon;
40
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
41
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
42
import eu.etaxonomy.cdm.model.term.DefinedTerm;
43
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
44

    
45
/**
46
 * @author a.mueller
47
 * @since 17.02.2020
48
 */
49
@Component
50
public class CaryoAizoaceaeExcelImport extends SimpleExcelTaxonImport<CaryoAizoaceaeExcelImportConfigurator>{
51

    
52
    private static final long serialVersionUID = -729761811965260921L;
53
    private static final Logger logger = Logger.getLogger(CaryoAizoaceaeExcelImport.class);
54

    
55
    private static final String ACCEPTED_PLANT_NAME_ID = "accepted_plant_name_id";
56
    private static final String NOMENCLATURAL_REMARKS = "nomenclatural_remarks";
57
    private static final String TAXON_RANK = "taxon_rank";
58
    private static final String NAME_CIT = "NameCit";
59
    private static final String KEW_NAME4CDM_LINK = "KewName4CDMLink";
60
    private static final String KEW_F_NAME4CDM_LINK = "KewFName4CDMLink";
61
    private static final String TAXON_STATUS = "taxon_status";
62
    private static final String PLANT_NAME_ID = "plant_name_id";
63
    private static final String IPNI_ID = "ipni_id";
64

    
65
    private Map<String, UUID> taxonMapping = new HashMap<>();
66
    private Reference secRef = null;
67
    private Set<String> neglectedRecords = new HashSet<>();
68
    private Set<UUID> createdNames = new HashSet<>();
69

    
70
    private SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state;
71
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> dedupHelper = null;
72

    
73
    @Override
74
    protected void firstPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
75
        int line = state.getCurrentLine();
76
        if ((line % 500) == 0){
77
            newTransaction(state);
78
            System.out.println(line);
79
        }
80

    
81
        this.state = state;
82
        Map<String, String> record = state.getOriginalRecord();
83

    
84
        String fullCitation = getValue(record, NAME_CIT);
85
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
86
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
87
        String status = getValue(record, TAXON_STATUS);
88
        String sourceId = getValue(record, PLANT_NAME_ID);
89
        String ipniId = getValue(record, IPNI_ID);
90
        String rankStr = getValue(record, TAXON_RANK);
91
        String nomenclaturalRemarks = getValue(record, NOMENCLATURAL_REMARKS);
92
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
93

    
94
        String row = String.valueOf(line) + "("+fullName+"): ";
95

    
96
        if("Misapplied".equals(status)){
97
            neglectedRecords.add(sourceId);
98
            return;
99
        }
100

    
101
        boolean isNewName = false;
102

    
103
        try {
104

    
105
            List<NomenclaturalStatusType> statusTypes = new ArrayList<>();
106
            Class<? extends CdmBase> clazz = makeStatus(status, sourceId, accId, row, statusTypes);
107

    
108
            TaxonName name;
109
            Rank rank = state.getTransformer().getRankByKey(rankStr);
110
            List<TaxonName> existingNames = getNameService().getNamesByNameCache(nameCache);
111
            Iterator<TaxonName> it = existingNames.iterator();
112
            while (it.hasNext()){
113
                TaxonName next = it.next();
114
                if (createdNames.contains(next.getUuid())){
115
                    it.remove();
116
                }
117
            }
118

    
119
            List<TaxonName> fullNameMatches = new ArrayList<>();
120

    
121
            @SuppressWarnings("rawtypes")
122
            List<TaxonBase> allFullNameTaxa = new ArrayList<>();
123
            @SuppressWarnings("rawtypes")
124
            List<TaxonBase> allNameCacheTaxa = new ArrayList<>();
125

    
126
            for (TaxonName existingName : existingNames){
127
                if (existingName.getTitleCache().equals(fullName)){
128
                    fullNameMatches.add(existingName);
129
                    allFullNameTaxa.addAll(existingName.getTaxonBases());
130
                }
131
                allNameCacheTaxa.addAll(existingName.getTaxonBases());
132
            }
133

    
134
            logMultipleCandidates(row, existingNames, fullNameMatches);
135

    
136
            TaxonBase<?> existingTaxon;
137
            if(allFullNameTaxa.size()>1){
138
                existingTaxon = findBestMatchingTaxon(allFullNameTaxa, clazz, row);
139
                name = existingTaxon.getName();
140
            }else if (allFullNameTaxa.size()==1){
141
                existingTaxon = allFullNameTaxa.iterator().next();
142
                name = existingTaxon.getName();
143
            }else{
144
                existingTaxon = null;
145
                if (!fullNameMatches.isEmpty()){
146
                    logger.warn(row + "None of the existing names exists as taxon/synonym. Existing name taken as base for new taxon/synonym created.");
147
                    if (fullNameMatches.size()>1){
148
                        logger.warn(row + "More than 1 matching full names exist as candidats for new taxon/synonym. Arbitrary one taken.");
149
                    }
150
                    name = fullNameMatches.iterator().next();
151
                }else if (!existingNames.isEmpty()){
152
                    if (!allNameCacheTaxa.isEmpty()){
153
                        logger.warn(row + "Taxa exist with matching nameCache but not matching fullname cache. New name and new taxon/synonym created. Other authors are " + getOtherAuthors(existingNames));
154
                        name = null;
155
                    }else{
156
                        logger.warn(row + "No matching fullnames exist but namecache matches. None of the matches is used in a taxon/synonym. Other authors are " + getOtherAuthors(existingNames));
157
                        name = null;
158
                    }
159
                }else{
160
                    name = null;
161
                }
162
            }
163

    
164
            if (existingTaxon == null){
165
                if (rank == null){
166
                    logger.warn(row + "Name has no rank " + nameCache);
167
                }else if (rank.equals(Rank.GENUS())){
168
                    logger.warn(row + "No name exists for genus " + nameCache + ". This is unexpected.");
169
                }
170
            }else{
171
                if (existingTaxon.isInstanceOf(Taxon.class)){
172
                    if (!CdmBase.deproxy(existingTaxon, Taxon.class).getTaxonNodes().isEmpty()){
173
                        neglectedRecords.add(sourceId);
174
                    }
175
                }else{
176
                    Taxon taxon = CdmBase.deproxy(existingTaxon, Synonym.class).getAcceptedTaxon();
177
                    if (taxon != null && !taxon.getTaxonNodes().isEmpty()){
178
                        neglectedRecords.add(sourceId);
179
                    }
180
                }
181
            }
182
            if (name == null){
183
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
184
                name = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
185
                if (name.isProtectedFullTitleCache() || name.isProtectedTitleCache() || name.isProtectedNameCache()
186
                        || name.isProtectedAuthorshipCache()){
187
                    logger.warn(row + "Name not parsable: " + fullCitation);
188
                    name.setTitleCache(fullName, true);
189
                    name.setNameCache(nameCache, true);
190
                }else{
191
                    testParsedName(state, name, row, null);
192
                }
193
                name.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
194
                name = dedupliateNameParts(name);
195
                getNameService().saveOrUpdate(name);
196
                isNewName = true;
197
                createdNames.add(name.getUuid());
198
            }else{
199
                testParsedName(state, name, row, fullCitation);
200
            }
201

    
202
            handleNomenclRemarkAndNameStatus(nomenclaturalRemarks, row, isNewName, name, statusTypes);
203

    
204
            TaxonBase<?> taxonBase = existingTaxon;
205

    
206
            if (taxonBase == null){
207
                if (clazz == Taxon.class){
208
                    taxonBase = Taxon.NewInstance(name, getSecRef());
209
                }else{
210
                    taxonBase = Synonym.NewInstance(name, getSecRef());
211
                }
212
                taxonBase.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
213
                getTaxonService().saveOrUpdate(taxonBase);
214
            }
215

    
216
            if (!isBlank(ipniId)){
217
                DefinedTerm ipniIdIdentifierType = DefinedTerm.IDENTIFIER_NAME_IPNI();
218
                name.addIdentifier(ipniId, ipniIdIdentifierType);
219
            }else{
220
                logger.warn(row + "IPNI id is missing.");
221
            }
222

    
223
            taxonMapping.put(sourceId, taxonBase.getUuid());
224
//            if("Accepted".equals(status)){
225
            if(taxonBase.isInstanceOf(Taxon.class)){
226
                    UUID existingUuid = taxonMapping.put(name.getNameCache(), taxonBase.getUuid());
227
                if (existingUuid != null){
228
                    logger.warn(row + name.getNameCache() + " has multiple instances in file");
229
                }
230
            }
231
        } catch (UndefinedTransformerMethodException e) {
232
            e.printStackTrace();
233
        }
234
    }
235

    
236
    private TaxonName dedupliateNameParts(TaxonName name) {
237
        if (state.getConfig().isDoDeduplicate()){
238
            getDedupHelper().replaceAuthorNamesAndNomRef(state, name);
239
        }
240
        return name;
241
    }
242

    
243
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> getDedupHelper() {
244
        if (dedupHelper == null){
245
            dedupHelper
246
                = ImportDeduplicationHelper.NewInstance(this, state);
247
        }
248
        return dedupHelper;
249
    }
250

    
251
    private String getOtherAuthors(List<TaxonName> otherNames) {
252
        String result = "";
253
        for (TaxonName name : otherNames){
254
            result = CdmUtils.concat(";", result, name.getAuthorshipCache());
255
        }
256
        return result;
257
    }
258

    
259
    private TaxonBase<?> findBestMatchingTaxon(@SuppressWarnings("rawtypes") List<TaxonBase> allFullNameTaxa,
260
            Class<? extends CdmBase> clazz, String row) {
261

    
262
        TaxonBase<?> result = null;
263
        TaxonBase<?> otherStatus = null;
264
        for (TaxonBase<?> taxonBase : allFullNameTaxa) {
265
            if (taxonBase.isInstanceOf(clazz)){
266
                if (result != null){
267
                    logger.warn(row + "More than 1 taxon with matching full name AND matching status exists. This is not further handled. Arbitrary one taken.");
268
                }
269
                result = taxonBase;
270
            }else{
271
                otherStatus = taxonBase;
272
            }
273
        }
274
        if (result == null && allFullNameTaxa.size()>1){
275
            logger.warn(row + "More than 1 taxon with matching fullname but NOT matching status exists. This is not further handled. Arbitrary one taken.");
276
        }
277
        return result == null? otherStatus :result ;
278
    }
279

    
280
    private void logMultipleCandidates(String row, List<TaxonName> existingNames, List<TaxonName> fullNameMatches) {
281
        if(fullNameMatches.size()>1){
282
            String message = row + "More than one name with matching full name exists in DB. Try to take best matching.";
283
            if (existingNames.size()>fullNameMatches.size()){
284
                message += " Additionally names with matching name cache exist.";
285
            }
286
            logger.warn(message);
287
        }else if (existingNames.size()>1){
288
            String message = row + "More than one name with matching nameCache exists in DB. ";
289
            if(fullNameMatches.isEmpty()){
290
                message += "But none matches full name.";
291
            }else{
292
                message += "But exactly 1 matches full name.";
293
            }
294
            logger.warn(message);
295
        }
296
    }
297

    
298
    private Class<? extends CdmBase> makeStatus(String status, String sourceId,
299
            String accId, String row, List<NomenclaturalStatusType> statusTypes) {
300

    
301
        Class<? extends CdmBase> clazz;
302
        if ("Accepted".equals(status) || "Unplaced".equals(status) || "Artificial Hybrid".equals(status) ){
303
            clazz = Taxon.class;
304
        }else if ("Synonym".equals(status) || "Orthographic".equals(status)){
305
            clazz = (accId == null)? Taxon.class : Synonym.class;
306
            if("Orthographic".equals(status)){
307
                statusTypes.add(NomenclaturalStatusType.SUPERFLUOUS());
308
//                addStatus(NomenclaturalStatusType.SUPERFLUOUS(), row, isNewName, statusAdded, statusTypes, null);
309
            }
310
        }else if("Illegitimate".equals(status)){
311
            clazz = getIllegInvalidStatus(sourceId, accId);
312
            statusTypes.add(NomenclaturalStatusType.ILLEGITIMATE());
313
//            addStatus(NomenclaturalStatusType.ILLEGITIMATE(), row, isNewName, statusAdded, statusTypes, getSecRef());
314
        }else if ("Invalid".equals(status)){
315
            clazz = getIllegInvalidStatus(sourceId, accId);
316
            statusTypes.add(NomenclaturalStatusType.INVALID());
317
//            addStatus(NomenclaturalStatusType.INVALID(), row, isNewName, statusAdded, statusTypes, getSecRef());
318
        }else{
319
            logger.warn(row + "Unhandled status: " + status);
320
            clazz = Taxon.class;  //to do something
321
        }
322
        return clazz;
323
    }
324

    
325
    private void handleNomenclRemarkAndNameStatus(String nomenclaturalRemarks, String row, boolean isNewName, TaxonName name,
326
            List<NomenclaturalStatusType> statusTypes) {
327

    
328
        NomenclaturalStatusType remarkType = null;
329
        NomenclaturalStatusType statusType = statusTypes.isEmpty()? null: statusTypes.iterator().next();
330
        if (nomenclaturalRemarks == null){
331
           //nothing to do
332
        }else if (", nom. illeg.".equals(nomenclaturalRemarks)){
333
            remarkType = NomenclaturalStatusType.ILLEGITIMATE();
334
        }else if (", nom. cons.".equals(nomenclaturalRemarks)){
335
            remarkType = NomenclaturalStatusType.CONSERVED();
336
        }else if (", nom. nud.".equals(nomenclaturalRemarks)){
337
            remarkType = NomenclaturalStatusType.NUDUM();
338
        }else if (", nom. provis.".equals(nomenclaturalRemarks)){
339
            remarkType = NomenclaturalStatusType.PROVISIONAL();
340
        }else if (", nom. rej.".equals(nomenclaturalRemarks)){
341
            remarkType = NomenclaturalStatusType.REJECTED();
342
        }else if (", nom. subnud.".equals(nomenclaturalRemarks)){
343
            remarkType = NomenclaturalStatusType.SUBNUDUM();
344
        }else if (", nom. superfl.".equals(nomenclaturalRemarks)){
345
            remarkType = NomenclaturalStatusType.SUPERFLUOUS();
346
        }else if (", not validly publ.".equals(nomenclaturalRemarks)){
347
            statusTypes.add(NomenclaturalStatusType.INVALID());
348
        }else if (", opus utique oppr.".equals(nomenclaturalRemarks)){
349
            statusTypes.add(NomenclaturalStatusType.OPUS_UTIQUE_OPPR());
350
        }else {
351
            logger.warn(row + "Unhandled nomenclatural remark: " + nomenclaturalRemarks);
352
        }
353

    
354
        NomenclaturalStatusType kewType = remarkType != null? remarkType : statusType;
355
        if (isNewName){
356
            if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
357
                logger.warn(row + "Kew suggests 2 different nom. status. types for new name. The status from nomenclatural_remarks was taken.");
358
            }
359
            if (kewType != null){
360
                name.addStatus(kewType, getSecRef(), null);
361
            }
362
        }else{
363
            NomenclaturalStatusType existingType = null;
364
            if (!name.getStatus().isEmpty()){
365
                existingType = name.getStatus().iterator().next().getType();
366
            }
367
            if (existingType != null && kewType != null){
368
                if (!existingType.equals(kewType)){
369
                    logger.warn(row + "Existing name status "+existingType.getTitleCache()+" differs from Kew status " + kewType.getTitleCache() + ". Key status ignored");
370
                }
371
            }else if (existingType != null && kewType == null){
372
                logger.warn(row + "Info: Existing name has a name status "+existingType.getTitleCache()+" but Kew name has no status. Existing status kept.");
373
            }else if (existingType == null && kewType != null){
374
                if(remarkType != null && statusType != null && !remarkType.equals(statusType)){
375
                    logger.warn(row + "Existing name has no status while Kew name suggests a status (but 2 different status form status and nomenclatural_remarks field).");
376
                }else{
377
                    logger.warn(row + "Existing name has no status while Kew name suggests a status ("+kewType.getTitleCache()+"). Kew status ignored.");
378
                }
379
            }
380
        }
381
    }
382

    
383
    private void newTransaction(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
384
        commitTransaction(state.getTransactionStatus());
385
        secRef = null;
386
        dedupHelper = null;
387
        state.setSourceReference(null);
388
        System.gc();
389
        state.setTransactionStatus(startTransaction());
390
    }
391

    
392
    private Reference getSecRef() {
393
        if (secRef == null){
394
            secRef = getReferenceService().find(state.getConfig().getSecUuid());
395
        }
396
        return secRef;
397
    }
398

    
399
    private Class<? extends CdmBase> getIllegInvalidStatus(String sourceId, String accId) {
400
        if (sourceId.equals(accId)){
401
            return Taxon.class;
402
        }else if(accId != null){
403
            return Synonym.class;
404
        }
405
        return null;
406
    }
407

    
408

    
409
    private void testParsedName(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state, TaxonName name,
410
            String row, String fullCitation) throws UndefinedTransformerMethodException {
411
        Map<String, String> record = state.getOriginalRecord();
412

    
413
        String nameCache = getValue(record, KEW_NAME4CDM_LINK);
414
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
415
        String rankStr = getValue(record, TAXON_RANK);
416
        String genusHybrid = getValue(record, "genus_hybrid");
417
        String genus = getValue(record, "genus");
418
        String speciesHybrid = getValue(record, "species_hybrid");
419
        String species = getValue(record, "species");
420
        String infraSpecRank = getValue(record, "infraspecific_rank");
421
        String infraspecies = getValue(record, "infraspecies");
422
        String basionymAuthor = getValue(record, "parenthetical_author");
423
        String combinationAuthor = getValue(record, "primary_author");
424
        String authors = getValue(record, "taxon_authors");
425
        String year = getValue(record, "KewYear4CDM");
426
        String pubType = getValue(record, "PubType");
427
        String place_of_publication = getValue(record, "place_of_publication");
428
        String volume_and_page = getValue(record, "volume_and_page");
429

    
430
        if (!CdmUtils.nullSafeEqual(name.getNameCache(), nameCache)){
431
            logger.warn(row + "Unexpected nameCache: " + nameCache);
432
        }
433
        if (!CdmUtils.nullSafeEqual(name.getTitleCache(), fullName)){
434
            logger.warn(row + "Unexpected titleCache: <->" + name.getTitleCache());
435
        }
436
        if (isBlank(genusHybrid) == name.isMonomHybrid()){
437
            logger.warn(row + "Unexpected genus hybrid: " + genusHybrid);
438
        }
439
        if (!CdmUtils.nullSafeEqual(name.getGenusOrUninomial(),genus)){
440
            logger.warn(row + "Unexpected genus: " + genus);
441
        }if (isBlank(speciesHybrid) == name.isBinomHybrid()){
442
            logger.warn(row + "Unexpected species hybrid: " + speciesHybrid);
443
        }
444
        if (!CdmUtils.nullSafeEqual(name.getSpecificEpithet(),species)){
445
            logger.warn(row + "Unexpected species epithet: " + name.getSpecificEpithet() +"<->"+ species);
446
        }
447
        if (!CdmUtils.nullSafeEqual(name.getInfraSpecificEpithet(), infraspecies)){
448
            logger.warn(row + "Unexpected infraspecific epithet: " + name.getInfraSpecificEpithet() +"<->"+ infraspecies);
449
        }
450
        if (!CdmUtils.nullSafeEqual(name.getAuthorshipCache(),authors)){
451
            logger.warn(row + "Unexpected authors: " + name.getAuthorshipCache() +"<->"+ authors);
452
        }
453
        String combinationAndExAuthor = authorTitle(name.getCombinationAuthorship(), name.getExCombinationAuthorship());
454
        if (!CdmUtils.nullSafeEqual(combinationAndExAuthor, combinationAuthor)){
455
            logger.warn(row + "Unexpected combination author: " + combinationAndExAuthor +"<->"+ combinationAuthor);
456
        }
457
        String basionymAndExAuthor = authorTitle(name.getBasionymAuthorship(), name.getExBasionymAuthorship());
458
        if (!CdmUtils.nullSafeEqual(basionymAndExAuthor, basionymAuthor)){
459
            logger.warn(row + "Unexpected basionym author: " + basionymAndExAuthor +"<->"+ basionymAuthor);
460
        }
461
        Rank rank = state.getTransformer().getRankByKey(rankStr);
462
        if (!rank.equals(name.getRank())){
463
            logger.warn(row + "Unexpected rank: " + rankStr);
464
        }
465

    
466
        Reference nomRef = name.getNomenclaturalReference();
467
        if (nomRef == null){
468
            if (fullCitation != null){
469
                NonViralNameParserImpl parser = new NonViralNameParserImpl();
470
                TaxonName parsedName = parser.parseReferencedName(fullCitation, NomenclaturalCode.ICNAFP, rank);
471
                if (parsedName.getNomenclaturalReference() != null){
472
                    name.setNomenclaturalReference(parsedName.getNomenclaturalReference());
473
                    logger.warn(row + "Nom.ref. was missing. Taken from Kew");
474
                }else{
475
                    logger.warn(row + "Nom. ref. is missing or can not be parsed");
476
                }
477
            }else{
478
                logger.warn(row + "NomRef is missing.");
479
            }
480
        }else{
481
            if ("A".equals(pubType) && nomRef.getType() != ReferenceType.Article){
482
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
483
            }
484
            if ("B".equals(pubType) && nomRef.getType() != ReferenceType.Book){
485
                logger.warn(row + "Unexpected nomref type: " + pubType + "<->" + nomRef.getType().toString());
486
            }
487
            year = normalizeYear(year);
488
            if (!CdmUtils.nullSafeEqual(year, nomRef.getDatePublishedString())){
489
                logger.warn(row + "Unexpected year: " + year + "<->" + nomRef.getDatePublishedString());
490
            }
491
            if (volume_and_page != null && !name.getFullTitleCache().contains(volume_and_page)){
492
                logger.warn(row + "volume_and_page not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ volume_and_page);
493
            }
494
            if (place_of_publication != null && !name.getFullTitleCache().contains(place_of_publication)){
495
                logger.warn(row + "place_of_publication not found in fullTitleCache: " + name.getFullTitleCache() +"<->"+ place_of_publication);
496
            }
497
        }
498
        if ("subsp.".equals(infraSpecRank) && !rank.equals(Rank.SUBSPECIES())){
499
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
500
        }else if ("var.".equals(infraSpecRank) && !rank.equals(Rank.VARIETY())){
501
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
502
        }else if ("f.".equals(infraSpecRank) && !rank.equals(Rank.FORM())){
503
            logger.warn(row + "Unexpected infraspec marker: " + infraSpecRank);
504
        }
505
    }
506

    
507
    private String authorTitle(TeamOrPersonBase<?> author, TeamOrPersonBase<?> exAuthor) {
508
        String authorStr = author == null? null: author.getNomenclaturalTitleCache();
509
        String exAuthorStr = exAuthor == null? null: exAuthor.getNomenclaturalTitleCache();
510
        return CdmUtils.concat(" ex ", exAuthorStr, authorStr);
511
    }
512

    
513
    private String normalizeYear(String year) {
514
        if (year == null){
515
            return null;
516
        }else if (year.contains("\" [")){
517
            String[] split = year.split("\" \\[");
518
            year = split[1].replace("]","") + " [" + split[0]+"\"]";
519
        }else if ("?".equals(year)){
520
            return null;
521
        }
522
        return year;
523
    }
524

    
525
    @Override
526
    protected void secondPass(SimpleExcelTaxonImportState<CaryoAizoaceaeExcelImportConfigurator> state) {
527
        Map<String, String> record = state.getOriginalRecord();
528
        int line = state.getCurrentLine();
529
        String fullName = getValue(record, KEW_F_NAME4CDM_LINK);
530
        String status = getValue(record, TAXON_STATUS);
531
        String sourceId = getValue(record, PLANT_NAME_ID);
532
        String accId = getValue(record, ACCEPTED_PLANT_NAME_ID);
533
        String accName = getValue(record, "AcceptedName");
534
        String basionymId = getValue(record, "basionym_plant_name_id");
535
        String homotypicSynonym = getValue(record, "homotypic_synonym");
536

    
537
        String row = String.valueOf(line) + "("+fullName+"): ";
538
        try {
539
            if ((line % 500) == 0){
540
                newTransaction(state);
541
                System.out.println(line);
542
            }
543

    
544
            if("Misapplied".equals(status)){
545
                return;
546
            }else if (neglectedRecords.contains(sourceId)){
547
                logger.info(row + "Record ignored.");
548
                return;
549
            }
550

    
551
            UUID uuid = taxonMapping.get(sourceId);
552
            TaxonBase<?> taxonBase = getTaxonService().find(uuid);
553
            if (taxonBase == null){
554
                logger.warn(row + "taxonBase not found: " + sourceId);
555
                return;
556
            }
557

    
558
            UUID accUuid = taxonMapping.get(accId);
559
            boolean hasAccepted = !sourceId.equals(accId);
560

    
561
            Taxon accTaxon = null;
562
            TaxonNode parent = null;
563
            Taxon child = null;
564
            Synonym syn = null;
565
            boolean isSynonymAccepted = false;
566

    
567
            if(accId == null){
568
                logger.info(row + "accID is null");
569
                child = CdmBase.deproxy(taxonBase, Taxon.class);
570
            }else if(hasAccepted){
571
                TaxonBase<?> accTaxonBase = getTaxonService().find(accUuid);
572
                if (accTaxonBase == null){
573
                    logger.warn(row + "acctaxon not found: " + accId + "; " + accName);
574
                }else if(!accTaxonBase.isInstanceOf(Taxon.class)){
575
                    logger.warn(row + "acctaxon is synonym: " + accId + "; " + accName);
576
                    isSynonymAccepted = true;
577
                }else{
578
                    accTaxon = CdmBase.deproxy(accTaxonBase, Taxon.class);
579
                    if (!accTaxon.getName().getTitleCache().equals(accName)){
580
                        logger.warn(row + "Accepted name differs: " + accName +" <-> "+ accTaxon.getName().getTitleCache());
581
                    }
582
                }
583
            }else if (sourceId.equals(accId)){
584
                if (!taxonBase.isInstanceOf(Taxon.class)){
585
                    logger.warn(row + "child not of class Taxon: " + sourceId);
586
                }else{
587
                    Rank rank = taxonBase.getName().getRank();
588
                    child = CdmBase.deproxy(taxonBase, Taxon.class);
589
                    if(rank.equals(Rank.GENUS())){
590
                        parent = getFamily();
591
                    }else if (rank.equals(Rank.SPECIES())){
592
                        String genus = child.getName().getGenusOrUninomial();
593
                        UUID parentUuid = taxonMapping.get(genus);
594
                        parent = getParent(parentUuid, row);
595
                    }else if (rank.isLower(Rank.SPECIES())){
596
                        String speciesName = child.getName().getGenusOrUninomial() + " " + child.getName().getSpecificEpithet();
597
                        UUID parentUuid = taxonMapping.get(speciesName);
598
                        parent = getParent(parentUuid, row);
599
                    }
600
                }
601
            }
602

    
603
            if (taxonBase.isInstanceOf(Synonym.class)){
604
                syn = CdmBase.deproxy(taxonBase, Synonym.class);
605
            }
606

    
607
            if ("Accepted".equals(status)){
608
                if (parent == null){
609
                    logger.warn(row + "Parent is missing. Taxon is moved to 'unresolved' instead'");
610
                    parent = unresolvedParent();
611
                }
612
                if (child == null){
613
                    logger.warn(row + "Child is missing. Taxon not imported.");
614
                }else{
615
                    if (!child.getTaxonNodes().isEmpty()){
616
                        if(!child.getName().getRank().equals(Rank.GENUS())){
617
                            logger.warn(row + "Taxon already has a parent. Taxon not attached to any further parent taxon.");
618
                        }
619
                    }else{
620
                        addChild(parent, child, row);
621
                    }
622
                }
623
            }else if ("Synonym".equals(status)){
624
                if(accTaxon == null){
625
                    if(isSynonymAccepted){
626
                        logger.warn(row +  "Synonym added to 'unresolved' as accepted taxon is synonym itself.");
627
                    }else if (accId != null){
628
                        logger.warn(row +  "Accepted taxon for synonym unexpectedly does not exist (it seems not to be a synonym itself). Synonym moved to 'unresolved'");
629
                    }else{
630
                        logger.warn(row +  "No accepted taxon given for synonym. Therefore taxon moved to 'unresolved'");
631
                    }
632
                    if(accId != null){
633
                        child = Taxon.NewInstance(syn.getName(), syn.getSec());
634
                        child.addImportSource(sourceId, PLANT_NAME_ID, getSourceReference(state), "line " + state.getCurrentLine());
635
                    }
636
                    addChild(unresolvedParent(), child, row);
637
                    getTaxonService().deleteSynonym(syn, new SynonymDeletionConfigurator());
638
                }else{
639
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
640
                }
641
            }else if ("Unplaced".equals(status)){
642
                parent = unresolvedParent();
643
                addChild(parent, child, row);
644
            }else if ("Artificial Hybrid".equals(status)){
645
                parent = hybridParent();
646
                addChild(parent, child, row);
647
            }else if ("Orthographic".equals(status)){
648
                if(accTaxon == null){
649
                    logger.warn(row + "'Orthographic' taxon has no acc taxon");
650
                }else{
651
                    accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
652
                }
653
            }else if("Illegitimate".equals(status) || "Invalid".equals(status)){
654
                if (hasAccepted){
655
                    if(accTaxon == null){
656
                        logger.warn(row + "accepted taxon for illegitimate or invalid taxon not found");
657
                    }else{
658
                        accTaxon.addSynonym(syn, SynonymType.SYNONYM_OF());
659
                    }
660
                }else{
661
                    addChild(unresolvedParent(), child, row);
662
                }
663
            }else{
664
                logger.warn(row + "Unhandled status: " +  status);
665
            }
666

    
667
            if (basionymId != null && false){
668
                UUID basionymUuid = taxonMapping.get(basionymId);
669
                TaxonBase<?> basionymTaxon = getTaxonService().find(basionymUuid);
670
                if (basionymTaxon != null){
671
                    if (hasSameAcceptedTaxon(taxonBase, basionymTaxon)){
672
                        if (taxonBase.getName().getBasionym() == null){
673
                            taxonBase.getName().addBasionym(basionymTaxon.getName());
674
                        }
675
                    }else{
676
                        logger.warn(row + "Basionym has not same accepted taxon and therefore was ignored.");
677
                    }
678
                }else{
679
                    logger.warn(row + "Basionym "+basionymId+" not found.");
680
                }
681
            }
682
        } catch (Exception e) {
683
            logger.error(row + "Error.");
684
            e.printStackTrace();
685
        }
686
    }
687

    
688
    private boolean hasSameAcceptedTaxon(TaxonBase<?> taxonBase, TaxonBase<?> basionymTaxon) {
689
        if (taxonBase.isInstanceOf(Synonym.class)){
690
            taxonBase = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
691
        }
692
        if (basionymTaxon.isInstanceOf(Synonym.class)){
693
            basionymTaxon = CdmBase.deproxy(basionymTaxon, Synonym.class).getAcceptedTaxon();
694
        }
695
        return taxonBase != null && basionymTaxon != null && taxonBase.equals(basionymTaxon);
696
    }
697

    
698
    private TaxonNode getParent(UUID parentUuid, String row) {
699
        if(parentUuid == null){
700
            logger.warn(row + "Parent uuid is null. No parent found.");
701
            return null;
702
        }
703
        TaxonBase<?> pTaxon = getTaxonService().find(parentUuid);
704
        if (pTaxon == null){
705
            logger.warn(row + "No parent found for parent UUID. This should not happen.");
706
            return null;
707
        }
708
        if (pTaxon.isInstanceOf(Synonym.class)){
709
            logger.warn(row + "Parent is synonym");
710
            return null;
711
        }else{
712
            Taxon ptax = CdmBase.deproxy(pTaxon, Taxon.class);
713
            if(ptax.getTaxonNodes().isEmpty()){
714
                logger.warn(row + "Parent has no node yet");
715
                return null;
716
            }else {
717
                if(ptax.getTaxonNodes().size()>1){
718
                    logger.warn("Parent has >1 nodes. Take arbitrary one");
719
                }
720
                return ptax.getTaxonNodes().iterator().next();
721
            }
722
        }
723
    }
724

    
725
    private void addChild(TaxonNode parent, Taxon child, String row) {
726
        if (parent == null){
727
            logger.warn(row + "Parent is null");
728
        }else if (child == null){
729
            logger.warn(row + "Child is null");
730
        }else{
731
            if (!child.getTaxonNodes().isEmpty()){
732
                TaxonNode childNode = child.getTaxonNodes().iterator().next();
733
                if (childNode.getParent() != null && childNode.getParent().equals(parent)){
734
                    logger.info(row + "Parent-child relation exists already.");
735
                }else{
736
                    logger.warn(row + "Child already has different parent. Parent-child relation not added.");
737
                }
738
            }else{
739
                TaxonNode node = parent.addChildTaxon(child, null, null);
740
                getTaxonNodeService().saveOrUpdate(node);
741
            }
742
        }
743
    }
744

    
745
    private TaxonNode getFamily(){
746
        UUID uuid = UUID.fromString("0334809a-aa20-447d-add9-138194f80f56");
747
        TaxonNode aizoaceae = getTaxonNodeService().find(uuid);
748
        return aizoaceae;
749
    }
750

    
751
    private TaxonNode hybridParent(){
752
        UUID uuid = UUID.fromString("2fae0fa1-758a-4fcb-bb6c-a2bd11f40641");
753
        TaxonNode hybridParent = getTaxonNodeService().find(uuid);
754
        return hybridParent;
755
    }
756
    private TaxonNode unresolvedParent(){
757
        UUID uuid = UUID.fromString("accb1ff6-5748-4b18-b529-9368c331a38d");
758
        TaxonNode unresolvedParent = getTaxonNodeService().find(uuid);
759
        return unresolvedParent;
760
    }
761
}
(1-1/7)