Project

General

Profile

Download (29.5 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

    
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16
import java.util.UUID;
17
import java.util.regex.Matcher;
18
import java.util.regex.Pattern;
19

    
20
import org.apache.commons.lang3.StringUtils;
21
import org.apache.logging.log4j.LogManager;
22
import org.apache.logging.log4j.Logger;
23
import org.springframework.stereotype.Component;
24
import org.springframework.transaction.TransactionStatus;
25

    
26
import eu.etaxonomy.cdm.common.CdmUtils;
27
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
28
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
29
import eu.etaxonomy.cdm.model.agent.Person;
30
import eu.etaxonomy.cdm.model.agent.Team;
31
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
32
import eu.etaxonomy.cdm.model.common.CdmBase;
33
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
34
import eu.etaxonomy.cdm.model.name.INonViralName;
35
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
36
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
37
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
38
import eu.etaxonomy.cdm.model.name.Rank;
39
import eu.etaxonomy.cdm.model.name.TaxonName;
40
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
41
import eu.etaxonomy.cdm.model.reference.Reference;
42
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
43
import eu.etaxonomy.cdm.model.reference.ReferenceType;
44
import eu.etaxonomy.cdm.model.taxon.Classification;
45
import eu.etaxonomy.cdm.model.taxon.Synonym;
46
import eu.etaxonomy.cdm.model.taxon.SynonymType;
47
import eu.etaxonomy.cdm.model.taxon.Taxon;
48
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
49
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
50
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
51

    
52
/**
53
 * Kew excel taxon import for Caryophyllaceae.
54
 *
55
 * @author a.mueller
56
 * @since 05.01.2022
57
 */
58
@Component
59
public class KewExcelTaxonImport<CONFIG extends KewExcelTaxonImportConfigurator>
60
            extends SimpleExcelTaxonImport<CONFIG>{
61

    
62
    private static final long serialVersionUID = 1081966876789613803L;
63
    private static final Logger logger = LogManager.getLogger();
64

    
65
    private static final String NO_SIMPLE_DIFF = "xxxxx";
66

    
67
    private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0";
68
    private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c";
69
    private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060";
70

    
71
    private static final String CDM_Name_UUID = "CDM-Name_UUID";
72
    private static final String Kew_Name_ID = "Kew-Name-ID";
73
    private static final String Kew_Name_Citation = "Kew-Name-Citation";
74
    private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status";
75
    private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status";
76
    private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID";
77
    private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID";
78
    private static final String GENUS_HYBRID = "genus_hybrid";
79
    private static final String GENUS = "genus";
80
    private static final String SPECIES_HYBRID = "species_hybrid";
81
    private static final String SPECIES = "species";
82

    
83
    private static final String infraspecific_rank = "infraspecific_rank";
84
    private static final String infraspecies = "infraspecies";
85

    
86
    private static final String parenthetical_author = "parenthetical_author";
87
    private static final String primary_author = "primary_author";
88
    private static final String publication_author = "publication_author";
89
    private static final String place_of_publication = "place_of_publication";
90
    private static final String volume_and_page = "volume_and_page";
91
    private static final String KewYear4CDM = "KewYear4CDM";
92
    private static final String PubTypeABSG = "PubTypeABSG";
93
    private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID";
94

    
95
    private static final Map<String, UUID> nameMap = new HashMap<>();
96
    private static final Map<String, UUID> taxonMap = new HashMap<>();
97

    
98
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
99
            CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status,
100
            Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS,
101
            SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies,
102
            parenthetical_author, primary_author, publication_author, place_of_publication,
103
            volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID
104
    });
105

    
106
    private Reference sourceReference;
107
    private Reference secReference;
108

    
109
    private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
110

    
111
//    @Override
112
//    protected String getWorksheetName(CONFIG config) {
113
//        return "valid taxa names";
114
//    }
115

    
116
    @Override
117
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
118

    
119
        String line = getLine(state, 50);
120
//        System.out.println(line);
121
        Map<String, String> record = state.getOriginalRecord();
122

    
123
        Set<String> keys = record.keySet();
124
        for (String key: keys) {
125
            if (! expectedKeys.contains(key)){
126
                logger.warn(line + "Unexpected Key: " + key);
127
            }
128
        }
129

    
130
        makeTaxon(state, line, record);
131
    }
132

    
133
    private void makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record) {
134
//        state.getTransactionStatus().flush();
135
        Reference sec = getSecReference(state, record);
136

    
137
        //name
138
        TaxonName existingName = getExistingName(state, line);
139
        if (existingName != null){
140
            verifyName(state, existingName, record, line, false);
141
        }else{
142
            existingName = createName(state, line);
143
        }
144

    
145
        //taxon
146
        TaxonBase<?> taxonBase = makeTaxonBase(state, line, record, existingName, sec);
147

    
148
        if (taxonBase != null){
149
            getTaxonService().saveOrUpdate(taxonBase);
150
        }
151

    
152
        return;
153
    }
154

    
155
    private TaxonName createName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
156
        //parse
157
        String fullTitle = getValue(state, Kew_Name_Citation);
158
        String kewNameId = getValue(state, Kew_Name_ID);
159

    
160
//        fullTitle = replaceBookSectionAuthor(state, fullTitle);
161

    
162
        TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES());
163
        handleBookSectionAuthor(newName, state, line);
164

    
165
        putName(kewNameId, newName.getUuid(), line);
166
        //name status
167
        makeNameStatus(line, state.getOriginalRecord(), newName);
168
        verifyName(state, newName, state.getOriginalRecord(), line, true);
169
        //deduplication
170
        replaceNameAuthorsAndReferences(state, newName);
171
        newName.addSource(makeOriginalSource(state));
172
        getNameService().saveOrUpdate(newName);
173
        //Kew-Nomencl-Status
174
        return newName;
175
    }
176

    
177
    private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState<CONFIG> state, String line) {
178
        String type = getValue(state, PubTypeABSG);
179
        if ("BS".equals(type)){
180
            Reference book = newName.getNomenclaturalReference();
181
            String pubAuthor = getValue(state, publication_author);
182
            if (book != null && StringUtils.isNotEmpty(pubAuthor)){
183
                TeamOrPersonBase<?> bookAuthor = parseBookSectionAuthor(pubAuthor, line);
184
                Reference bookSection = ReferenceFactory.newBookSection();
185
                bookSection.setAuthorship(book.getAuthorship());
186
                book.setAuthorship(bookAuthor);
187
                bookSection.setInReference(book);
188
                bookSection.setDatePublished(book.getDatePublished());
189
                newName.setNomenclaturalReference(bookSection);
190
            }else{
191
                logger.warn(line + "unexpected booksection author handling");
192
            }
193
        }
194
    }
195

    
196
    private TeamOrPersonBase<?> parseBookSectionAuthor(String pubAuthor, String line) {
197
        TeamOrPersonBase<?> result;
198
        String ed = "";
199
        if (pubAuthor.endsWith(" (ed.)")){
200
            ed = " (ed.)";
201
        }else if (pubAuthor.endsWith(" (eds.)")){
202
            ed = " (eds.)";
203
        }
204
        pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length());
205
        String[] splits = pubAuthor.split("(, | & )");
206
        if (splits.length > 1){
207
            Team team = Team.NewInstance();
208
            result = team;
209
            for (String split : splits){
210
                if ("al.".equals(split.trim())){
211
                    team.setHasMoreMembers(true);
212
                }else{
213
                    team.addTeamMember(getPerson(split, line));
214
                }
215
            }
216
        }else{
217
            result = getPerson(splits[0], line);
218
        }
219
        if (ed.length() > 0){
220
            result.setTitleCache(result.getTitleCache() + ed, true);
221
        }
222
        return result;
223
    }
224

    
225
    private Person getPerson(String personStr, String line) {
226
        Person result = Person.NewInstance();
227
        String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?<famname>[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)";
228
//        regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier";
229
        Matcher matcher = Pattern.compile(regEx).matcher(personStr);
230
        if (matcher.matches()){
231
            String famName = matcher.group("famname");
232
            result.setFamilyName(famName);
233
            String initials = personStr.replace(famName,"").trim();
234
            result.setInitials(initials);
235
        }else{
236
            result.setTitleCache(personStr, true);
237
            logger.warn(line + "BookSection author could not be parsed: " +  personStr);
238
        }
239
        return result;
240
    }
241

    
242
    private String replaceBookSectionAuthor(SimpleExcelTaxonImportState<CONFIG> state, String fullTitle) {
243
        String type = getValue(state, PubTypeABSG);
244
        if ("BS".equals(type)){
245
            String pubAuthor = getValue(state, publication_author);
246
            int inIndex = fullTitle.indexOf(" in ");
247
            int commaIndex = fullTitle.indexOf(", ");
248

    
249
        }
250
        return fullTitle;
251
    }
252

    
253
    private void verifyName(SimpleExcelTaxonImportState<CONFIG> state, TaxonName taxonName,
254
            Map<String, String> record, String line, boolean isNew) {
255
        if (isNew){
256
            boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line);
257
            if (!parsed){
258
                return;
259
            }
260
        }
261
        String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew);
262
        verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew);
263
        verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew);
264
        verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew);
265
        String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship());
266
        verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew);
267
        String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship());
268
        verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew);
269

    
270
        //reference
271
        Reference nomRef = taxonName.getNomenclaturalReference();
272
        if (nomRef == null){
273
            logger.warn(line + "no nom.ref. exists in existing name");
274
        }else{
275

    
276
            //place of publication
277
            boolean hasInRef = nomRef.getInReference() != null;
278
            String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ?
279
                    nomRef.getInReference().getAbbrevTitle() :
280
                    nomRef.getAbbrevTitle();
281
            String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew);
282
            //author
283
            String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache();
284
            verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew);
285
            //vol and page
286
            String existingVolume = getVolume(nomRef);
287
            String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference());
288
            verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew);
289
            //year
290
            verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew);
291
            //pub type
292
            verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew);
293
        }
294
    }
295

    
296
    private String getVolume(Reference nomRef) {
297
        Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef;
298
        String vol = ref.getVolume();
299
        String edition = ref.getEdition();
300
        if (StringUtils.isNotBlank(edition)){
301
            edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ",";
302
        }
303
        String series = ref.getSeriesPart();
304
        if (StringUtils.isNotBlank(series)){
305
            series = ", " + (isNumber(series)? "ser. ":"") + series + ",";
306
        }
307

    
308
        return vol;
309
    }
310

    
311
    private boolean isNumber(String edition) {
312
        try {
313
            Integer.valueOf(edition);
314
        } catch (NumberFormatException e) {
315
            return false;
316
        }
317
        return true;
318
    }
319

    
320
    private String authorAndExAuthor(TeamOrPersonBase<?> author,
321
            TeamOrPersonBase<?> exAuthor) {
322
        return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "")
323
                + author.getNomenclaturalTitleCache();
324
    }
325

    
326
    private String replaceStatus(String fullTitleCache) {
327
        return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", "");
328
    }
329

    
330
    private String abbrefRefType(ReferenceType type) {
331
        return type == ReferenceType.Article ? "A" :
332
            type == ReferenceType.Book ? "B" :
333
            type == ReferenceType.BookSection ? "BS" :
334
            type == ReferenceType.Generic ? "GEN" :
335
            type.getLabel() ;
336
    }
337

    
338
    private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line, String noLogIf, boolean isNew) {
339
        return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew);
340
    }
341

    
342
    private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line,
343
            String noLogIf, String noLogIf2, boolean isNew) {
344
        String value = getValue(record, fieldName);
345
        if (!CdmUtils.nullSafeEqual(expectedValue, value)){
346
            String diff = singleDiff(expectedValue, value);
347
            String label = isNew ? "New     " : "Existing";
348
            if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){
349
                System.out.println("   " + line + fieldName + "\n        "+label+": " + expectedValue + "\n        Kew     : " + value);
350
            }
351
            return diff;
352
        }else{
353
            return "";
354
        }
355
    }
356

    
357
    private String singleDiff(String expectedValue, String value) {
358
        if (expectedValue == null){
359
            return CdmUtils.Nz(value);
360
        }else if (value == null){
361
            return CdmUtils.Nz(expectedValue);
362
        }
363
        expectedValue = expectedValue.trim();
364
        value = value.trim();
365
        String diff_ab = StringUtils.difference(expectedValue, value);
366
        String diff_ba = StringUtils.difference(value, expectedValue);
367
        if (diff_ab.endsWith(diff_ba)){
368
            return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length());
369
        }else if (diff_ba.endsWith(diff_ab)){
370
            return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length());
371
        }else{
372
            return NO_SIMPLE_DIFF;
373
        }
374
    }
375

    
376
    private TaxonName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
377
        String cdmNameUuid = getValue(state, CDM_Name_UUID);
378
        String kewNameId = getValue(state, Kew_Name_ID);
379
        if (cdmNameUuid == null){
380
            return null;
381
        }
382
        TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid));
383
        if (existingName != null){
384
            putName(kewNameId, existingName.getUuid(), line);
385
            return CdmBase.deproxy(existingName);
386
        }else{
387
            return null;
388
        }
389
    }
390

    
391
    private void putName(String kewNameId, UUID uuid, String line) {
392
        UUID existingUuid = nameMap.put(kewNameId, uuid);
393
        if (existingUuid != null){
394
            logger.warn(line + "Kew-Name-id already exists: " + kewNameId);
395
        }
396
    }
397

    
398

    
399
    private void makeNameStatus(String line, Map<String, String> record,
400
            TaxonName taxonName) {
401
        String nameStatus = getValue(record, Kew_Nomencl_Status);
402
        NomenclaturalStatusType status;
403
        if (isBlank(nameStatus)){
404
            status = null;
405
        }else if ("Illegitimate".equals(nameStatus)){
406
            status = NomenclaturalStatusType.ILLEGITIMATE();
407
        }else if ("Invalid".equals(nameStatus)){
408
            status = NomenclaturalStatusType.INVALID();
409
        }else{
410
            logger.warn(line + "Nom. status not recognized: " + nameStatus);
411
            status = null;
412
        }
413
        if (status != null){
414
            taxonName.addStatus(NomenclaturalStatus.NewInstance(status));
415
        }
416
    }
417

    
418

    
419
    private TaxonBase<?> makeTaxonBase(SimpleExcelTaxonImportState<CONFIG> state, String line,
420
            Map<String, String> record, TaxonName taxonName, Reference sec) {
421

    
422
        TaxonBase<?> taxonBase;
423
        boolean isUnplaced = false;
424
        String taxStatusStr = getValue(record, Kew_Taxonomic_Status);
425

    
426
        if ("Accepted".equals(taxStatusStr)){
427
            taxonBase = Taxon.NewInstance(taxonName, sec);
428
        }else if ("Synonym".equals(taxStatusStr)){
429
            taxonBase = Synonym.NewInstance(taxonName, sec);
430
        }else if ("Artificial Hybrid".equals(taxStatusStr)){
431
            taxonBase = Synonym.NewInstance(taxonName, sec);
432
        }else if ("Unplaced".equals(taxStatusStr)){
433
            taxonBase = Taxon.NewInstance(taxonName, sec);
434
        }else{
435
            logger.warn(line + "Status not handled: " + taxStatusStr);
436
            return null;
437
        }
438
        taxonBase.addSource(makeOriginalSource(state));
439
        taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid());
440
        if (taxonBase instanceof Taxon){
441
            UUID existing = taxonMap.get(taxonBase.getName().getNameCache());
442
            if (existing == null || !isUnplaced){
443
                taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
444
            }else if (!isUnplaced){
445
                taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
446
                System.out.println("  " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache());
447
            }
448
        }
449
        return taxonBase;
450
    }
451

    
452
    int c2 = 0;
453
    @Override
454
    protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
455

    
456
        String kewId = getValue(state, Kew_Name_ID) + ": ";
457
        String line = " (line: " + state.getCurrentLine() + ")";
458
//        System.out.println(line);
459
        if (c2++ % 100 == 0){
460
            this.commitTransaction(state.getTransactionStatus());
461
            this.classification = null;
462
            this.secReference = null;
463
            this.sourceReference = null;
464
            this.orphanedSynonymTaxon = null;
465
            TransactionStatus tx = this.startTransaction();
466
            state.setTransactionStatus(tx);
467
            logger.info(line + "New transaction started.");
468
        }
469
        Map<String, String> record = state.getOriginalRecord();
470

    
471
        Classification classification = getClassification(state);
472
        TaxonBase<?> taxonBase = getTaxon(record);
473
        TaxonName taxonName = taxonBase.getName();
474

    
475
        if (taxonBase.isInstanceOf(Taxon.class)){
476
            Taxon parent = getParent(record, taxonName, line, kewId);
477
            if (parent != null){
478
                classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null);
479
            }
480
        }else if (taxonBase.isInstanceOf(Synonym.class)){
481
            Taxon taxon = getAcceptedTaxon(record, line, kewId);
482
            if (taxon == null){
483
                taxon = getOrphanedSynonymTaxon(state);
484
                logger.warn(kewId + "Accepted taxon not found. Added synonym to 'orphaned synonym taxon': " + getValue(record, Kew_Rel_Acc_Name_ID) + line);
485
            }
486
            taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF);
487
        }else{
488
            logger.warn("Unhandled");
489
        }
490

    
491
        String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID);
492
        if (basionymId != null){
493
            UUID basionymUuid = nameMap.get(basionymId);
494
            TaxonName basionym = getNameService().find(basionymUuid);
495
            if(basionym == null){
496
                logger.warn(kewId + "Basionym does not exist: " + basionymId + line);
497
            }else{
498
                taxonName.addBasionym(basionym);
499
                taxonName.mergeHomotypicGroups(basionym);  //just in case this is not automatically done
500
                //TODO
501
                //          adjustSynonymType(taxonBase, basionymTaxon, line);
502
            }
503
        }
504

    
505
    }
506

    
507
    Taxon orphanedSynonymTaxon;
508
    private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState<CONFIG> state) {
509
        if (orphanedSynonymTaxon != null) {
510
            return orphanedSynonymTaxon;
511
        }
512
        UUID orphanedTaxonUuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON);
513
        orphanedSynonymTaxon = CdmBase.deproxy(getTaxonService().find(orphanedTaxonUuid), Taxon.class);
514
        if (orphanedSynonymTaxon == null){
515
            TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY());
516
            placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true);
517
            orphanedSynonymTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord()));
518
            orphanedSynonymTaxon.setUuid(orphanedTaxonUuid);
519
            Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
520
            TaxonNode orphandNode = getClassification(state).addParentChild(unplacedTaxon, orphanedSynonymTaxon, null, null);
521
            getTaxonNodeService().save(orphandNode);
522
        }
523
        return orphanedSynonymTaxon;
524
    }
525

    
526
    private Classification classification;
527
    private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
528
        if (classification == null){
529
            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
530
        }
531
        return classification;
532
    }
533

    
534
    private Taxon getAcceptedTaxon(Map<String, String> record, String line, String kewId) {
535
        String statusStr = getValue(record, Kew_Taxonomic_Status);
536
        if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){
537
            String accKewId = getValue(record, Kew_Rel_Acc_Name_ID);
538
            UUID accUuid = taxonMap.get(accKewId);
539
            TaxonBase<?> accBase = getTaxonService().find(accUuid);
540
            if (accBase == null){
541
                logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line);
542
                return null;
543
            }else if (accBase.isInstanceOf(Synonym.class)){
544
                logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line);
545
                return null;
546
            }else{
547
                return CdmBase.deproxy(accBase, Taxon.class);
548
            }
549
        }else{
550
            logger.warn(kewId + "Parent not retrieved" +  line);
551
            return null;
552
        }
553
    }
554

    
555
    private Taxon getParent(Map<String, String> record, TaxonName taxonName, String line, String kewId) {
556
        String statusStr = getValue(record, Kew_Taxonomic_Status);
557
        if ("Unplaced".equals(statusStr)){
558
            return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
559
        }else if ("Artificial Hybrid".equals(statusStr)){
560
            return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now
561
        }else if ("Accepted".equals(statusStr)){
562
            String higherName = getHigherRankName(taxonName);
563
            UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName);
564
            if (parentTaxonUuid != null){
565
                TaxonBase<?> parentBase = getTaxonService().find(parentTaxonUuid);
566
                if (parentBase == null){
567
                    return null;
568
                } else if (parentBase.isInstanceOf(Taxon.class)){
569
                    Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class);
570
                    return parentTaxon;
571
                } else {
572
                    logger.warn(kewId + "Parent is synonym " + line);
573
                    return null;
574
                }
575
            }else{
576
                return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class);
577
            }
578
        }else if ("Synonym".equals(statusStr)){
579
            //not relevant
580
            return null;
581
        }else{
582
            logger.warn(kewId + "Parent not retrieved" + line);
583
            return null;
584
        }
585
    }
586

    
587
    private String getHigherRankName(TaxonName taxonName) {
588
        if (Rank.SPECIES().equals(taxonName.getRank())){
589
            return taxonName.getGenusOrUninomial();
590
        }else if (taxonName.isInfraSpecific()){
591
            return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet();
592
        }
593
        return null;
594
    }
595

    
596
    private void adjustSynonymType(TaxonBase<?> taxonBase, TaxonBase<?> homotypicTaxon, String line) {
597
        adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line);
598
        adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line);
599
    }
600

    
601
    private void adjustSynonymTypeOrdered(TaxonBase<?> firstTaxon, TaxonBase<?> secondTaxon, String line) {
602
        if (firstTaxon == null){
603
            logger.warn(line + "first taxon is null for adjust synonym type");
604
        }else if (secondTaxon == null){
605
            logger.warn(line + "second taxon is null for adjust synonym type");
606
        }else if (secondTaxon.isInstanceOf(Synonym.class)){
607
            Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class);
608
            if (firstTaxon.equals(syn.getAcceptedTaxon())){
609
                syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF);
610
            }
611
        }
612
    }
613

    
614
    protected TaxonBase<?> getTaxon(Map<String, String> record) {
615
        String kew_name_id = getValue(record, Kew_Name_ID);
616
        UUID taxonUuid = taxonMap.get(kew_name_id);
617
        TaxonBase<?> taxon = getTaxonService().find(taxonUuid);
618
        return taxon;
619
    }
620

    
621
	private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) {
622
		boolean result = true;
623
	    if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) {
624
			logger.warn(line + "Name could not be parsed: " + fullName);
625
			result = false;
626
		}
627
		Reference nomRef = name.getNomenclaturalReference();
628
		if (nomRef != null && (nomRef.isProtectedTitleCache()
629
		        || nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){
630
		    logger.warn(line + "Nom ref could not be parsed: " + fullName);
631
            result = false;
632
		}
633
		if (nameStr != null && !name.getTitleCache().equals(nameStr)){
634
            logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr);
635
            result = false;
636
        }
637
		return result;
638
	}
639

    
640
    private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record) {
641
        if (this.secReference == null){
642
            logger.warn("Load sec ref");
643
            String secUuid = record.get(Sec_Ref_CDM_UUID);
644
            secReference = getReferenceService().load(UUID.fromString(secUuid));
645
            if (this.secReference == null){
646
                logger.warn("Sec ref is null");
647
            }
648
        }
649
        return this.secReference;
650
    }
651

    
652
    private Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
653
        if (this.sourceReference == null){
654
            this.sourceReference = getPersistentReference(state.getConfig().getSourceReference());
655
        }
656
        return this.sourceReference;
657
    }
658

    
659
    private Reference getPersistentReference(Reference reference) {
660
        Reference result = getReferenceService().find(reference.getUuid());
661
        logger.warn("Loaded persistent reference: "+ reference.getUuid());
662
        if (result == null){
663
            logger.warn("Persistent reference is null: " + reference.getUuid());
664
            result = reference;
665
            getReferenceService().saveOrUpdate(result);
666
        }
667
        return result;
668
    }
669

    
670
    private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState<CONFIG> state, INonViralName name) {
671
        state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
672
    }
673

    
674

    
675
    @Override
676
    protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
677
    	String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID);
678
        return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state));
679
    }
680
}
(8-8/9)