Project

General

Profile

« Previous | Next » 

Revision a881d4d7

Added by Andreas Müller 4 months ago

ref #9918 first version of caryophyllaceae import

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/caryophyllales/CaryophyllaceaeActivator.java
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.caryophyllales;
10

  
11
import java.util.UUID;
12

  
13
import org.apache.log4j.Logger;
14

  
15
import eu.etaxonomy.cdm.app.berlinModelImport.SourceBase;
16
import eu.etaxonomy.cdm.app.common.CdmDestinations;
17
import eu.etaxonomy.cdm.common.URI;
18
import eu.etaxonomy.cdm.database.DbSchemaValidation;
19
import eu.etaxonomy.cdm.database.ICdmDataSource;
20
import eu.etaxonomy.cdm.io.caryo.KewExcelTaxonImportConfigurator;
21
import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
22
import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
23
import eu.etaxonomy.cdm.model.reference.Reference;
24
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
25

  
26
/**
27
 * @author a.mueller
28
 * @since 05.01.2022
29
 */
30
public class CaryophyllaceaeActivator extends SourceBase{
31

  
32
	@SuppressWarnings("unused")
33
    private static final Logger logger = Logger.getLogger(CaryophyllaceaeActivator.class);
34

  
35
	//database validation status (create, update, validate ...)
36
	static final DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE;
37
	static final URI source = caryophyllaceae();
38

  
39

  
40
//	static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
41
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_caryo_spp();
42

  
43
	//classification
44
	static final UUID classificationUuid = UUID.fromString("9edc58b5-de3b-43aa-9f31-1ede7c009c2b");
45

  
46
	//check - import
47
	static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
48

  
49
	//taxa
50
	static final boolean doTaxa = true;
51

  
52
	private void doImport(ICdmDataSource cdmDestination){
53

  
54
		//make Source
55
	    KewExcelTaxonImportConfigurator config= KewExcelTaxonImportConfigurator.NewInstance(source, cdmDestination);
56
		config.setClassificationUuid(classificationUuid);
57
		config.setCheck(check);
58
//		config.setDoTaxa(doTaxa);
59
		config.setDbSchemaValidation(hbm2dll);
60
		config.setSourceReferenceTitle("WCVP2CDM-Caryophyllaceae.xlsx");
61

  
62
		CdmDefaultImport<KewExcelTaxonImportConfigurator> myImport = new CdmDefaultImport<>();
63

  
64
		//...
65
		if (true){
66
			System.out.println("Start import from ("+ source.toString() + ") ...");
67
			config.setSourceReference(getSourceReference(config.getSourceReferenceTitle()));
68
			myImport.invoke(config);
69
			System.out.println("End import from ("+ source.toString() + ")...");
70
		}
71
	}
72

  
73
	private Reference getSourceReference(String string) {
74
		Reference result = ReferenceFactory.newGeneric();
75
		result.setTitleCache(string, true);
76
		return result;
77
	}
78

  
79

  
80
	public static URI caryophyllaceae(){
81
      String fileName = "WCVP2CDM-Caryophyllaceae.xlsx";
82
      URI uri = URI.create("file:////BGBM-PESIHPC/Caryophyllales/" +  fileName);
83
      return uri;
84
	}
85

  
86
	public static void main(String[] args) {
87
		CaryophyllaceaeActivator me = new CaryophyllaceaeActivator();
88
		me.doImport(cdmDestination);
89
		System.exit(0);
90
	}
91
}
app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java
590 590
    public static ICdmDataSource cdm_local_caryo_spp(){
591 591
        DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
592 592
        String cdmServer = "127.0.0.1";
593
        String cdmDB = "cdm_caryo_spp";
593
        String cdmDB = "cdm_local_caryophyllales_spp";
594 594
        String cdmUserName = "edit";
595 595
        return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
596 596
    }
app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImport.java
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

  
11
import java.util.Arrays;
12
import java.util.HashMap;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16
import java.util.UUID;
17
import java.util.regex.Matcher;
18
import java.util.regex.Pattern;
19

  
20
import org.apache.commons.lang3.StringUtils;
21
import org.apache.log4j.Logger;
22
import org.springframework.stereotype.Component;
23
import org.springframework.transaction.TransactionStatus;
24

  
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
27
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.CdmBase;
32
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
33
import eu.etaxonomy.cdm.model.name.INonViralName;
34
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
35
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
36
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
37
import eu.etaxonomy.cdm.model.name.Rank;
38
import eu.etaxonomy.cdm.model.name.TaxonName;
39
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
40
import eu.etaxonomy.cdm.model.reference.Reference;
41
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
42
import eu.etaxonomy.cdm.model.reference.ReferenceType;
43
import eu.etaxonomy.cdm.model.taxon.Classification;
44
import eu.etaxonomy.cdm.model.taxon.Synonym;
45
import eu.etaxonomy.cdm.model.taxon.SynonymType;
46
import eu.etaxonomy.cdm.model.taxon.Taxon;
47
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
48
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
49

  
50
/**
51
 * Kew excel taxon import for Caryophyllaceae.
52
 *
53
 * @author a.mueller
54
 * @since 05.01.2022
55
 */
56
@Component
57
public class KewExcelTaxonImport<CONFIG extends KewExcelTaxonImportConfigurator>
58
            extends SimpleExcelTaxonImport<CONFIG>{
59

  
60
    private static final long serialVersionUID = 1081966876789613803L;
61
    private static final Logger logger = Logger.getLogger(KewExcelTaxonImport.class);
62

  
63
    private static final String NO_SIMPLE_DIFF = "xxxxx";
64

  
65
    private static final String KEW_UNPLACED_NODE = "82a9e3a1-2519-402a-b3c9-ec4c1fddf4d0";
66
    private static final String KEW_ACCEPTED_NODE = "b44da8af-6ad8-4b41-98cd-8f4c1a1bd00c";
67
    private static final String KEW_ORPHANED_PLACEHOLDER_TAXON = "dccac79b-a967-49ed-b153-5faa83194060";
68

  
69
    private static final String CDM_Name_UUID = "CDM-Name_UUID";
70
    private static final String Kew_Name_ID = "Kew-Name-ID";
71
    private static final String Kew_Name_Citation = "Kew-Name-Citation";
72
    private static final String Kew_Taxonomic_Status = "Kew-Taxonomic-Status";
73
    private static final String Kew_Nomencl_Status = "Kew-Nomencl-Status";
74
    private static final String Kew_Rel_Acc_Name_ID = "Kew-Rel-Acc-Name-ID";
75
    private static final String Kew_Rel_Basionym_Name_ID = "Kew-Rel-Basionym-Name-ID";
76
    private static final String GENUS_HYBRID = "genus_hybrid";
77
    private static final String GENUS = "genus";
78
    private static final String SPECIES_HYBRID = "species_hybrid";
79
    private static final String SPECIES = "species";
80

  
81
    private static final String infraspecific_rank = "infraspecific_rank";
82
    private static final String infraspecies = "infraspecies";
83

  
84
    private static final String parenthetical_author = "parenthetical_author";
85
    private static final String primary_author = "primary_author";
86
    private static final String publication_author = "publication_author";
87
    private static final String place_of_publication = "place_of_publication";
88
    private static final String volume_and_page = "volume_and_page";
89
    private static final String KewYear4CDM = "KewYear4CDM";
90
    private static final String PubTypeABSG = "PubTypeABSG";
91
    private static final String Sec_Ref_CDM_UUID = "Sec-Ref-CDM-UUID";
92

  
93
    private static final Map<String, UUID> nameMap = new HashMap<>();
94
    private static final Map<String, UUID> taxonMap = new HashMap<>();
95

  
96
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
97
            CDM_Name_UUID, Kew_Name_ID, Kew_Name_Citation, Kew_Taxonomic_Status,
98
            Kew_Nomencl_Status, Kew_Rel_Acc_Name_ID, Kew_Rel_Basionym_Name_ID, GENUS_HYBRID, GENUS,
99
            SPECIES_HYBRID, SPECIES, infraspecific_rank, infraspecies,
100
            parenthetical_author, primary_author, publication_author, place_of_publication,
101
            volume_and_page, KewYear4CDM, PubTypeABSG, Sec_Ref_CDM_UUID
102
    });
103

  
104
    private Reference sourceReference;
105
    private Reference secReference;
106

  
107
    private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
108

  
109
//    @Override
110
//    protected String getWorksheetName(CONFIG config) {
111
//        return "valid taxa names";
112
//    }
113

  
114
    @Override
115
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
116

  
117
        String line = getLine(state, 50);
118
        System.out.println(line);
119
        Map<String, String> record = state.getOriginalRecord();
120

  
121
        Set<String> keys = record.keySet();
122
        for (String key: keys) {
123
            if (! expectedKeys.contains(key)){
124
                logger.warn(line + "Unexpected Key: " + key);
125
            }
126
        }
127

  
128
        makeTaxon(state, line, record);
129
    }
130

  
131
    private void makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record) {
132
//        state.getTransactionStatus().flush();
133
        Reference sec = getSecReference(state, record);
134

  
135
        //name
136
        TaxonName existingName = getExistingName(state, line);
137
        if (existingName != null){
138
            verifyName(state, existingName, record, line, false);
139
        }else{
140
            existingName = createName(state, line);
141
        }
142

  
143
        //taxon
144
        TaxonBase<?> taxonBase = makeTaxonBase(state, line, record, existingName, sec);
145

  
146
        if (taxonBase != null){
147
            getTaxonService().saveOrUpdate(taxonBase);
148
        }
149

  
150
        return;
151
    }
152

  
153
    private TaxonName createName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
154
        //parse
155
        String fullTitle = getValue(state, Kew_Name_Citation);
156
        String kewNameId = getValue(state, Kew_Name_ID);
157

  
158
        fullTitle = replaceBookSectionAuthor(state, fullTitle);
159

  
160
        TaxonName newName = parser.parseReferencedName(fullTitle, NomenclaturalCode.ICNAFP, Rank.SPECIES());
161
        handleBookSectionAuthor(newName, state, line);
162

  
163
        putName(kewNameId, newName.getUuid(), line);
164
        //name status
165
        makeNameStatus(line, state.getOriginalRecord(), newName);
166
        verifyName(state, newName, state.getOriginalRecord(), line, true);
167
        //deduplication
168
        replaceNameAuthorsAndReferences(state, newName);
169
        newName.addSource(makeOriginalSource(state));
170
        getNameService().saveOrUpdate(newName);
171
        //Kew-Nomencl-Status
172
        return newName;
173
    }
174

  
175
    private void handleBookSectionAuthor(TaxonName newName, SimpleExcelTaxonImportState<CONFIG> state, String line) {
176
        String type = getValue(state, PubTypeABSG);
177
        if ("BS".equals(type)){
178
            Reference book = newName.getNomenclaturalReference();
179
            String pubAuthor = getValue(state, publication_author);
180
            if (book != null && StringUtils.isNotEmpty(pubAuthor)){
181
                TeamOrPersonBase<?> bookAuthor = parseBookSectionAuthor(pubAuthor, line);
182
                Reference bookSection = ReferenceFactory.newBookSection();
183
                bookSection.setAuthorship(book.getAuthorship());
184
                book.setAuthorship(bookAuthor);
185
                bookSection.setInReference(book);
186
                bookSection.setDatePublished(book.getDatePublished());
187
                newName.setNomenclaturalReference(bookSection);
188
            }else{
189
                logger.warn(line + "unexpected booksection author handling");
190
            }
191
        }
192
    }
193

  
194
    private TeamOrPersonBase<?> parseBookSectionAuthor(String pubAuthor, String line) {
195
        TeamOrPersonBase<?> result;
196
        String ed = "";
197
        if (pubAuthor.endsWith(" (ed.)")){
198
            ed = " (ed.)";
199
        }else if (pubAuthor.endsWith(" (eds.)")){
200
            ed = " (eds.)";
201
        }
202
        pubAuthor = pubAuthor.substring(0, pubAuthor.length() - ed.length());
203
        String[] splits = pubAuthor.split("(, | & )");
204
        if (splits.length > 1){
205
            Team team = Team.NewInstance();
206
            result = team;
207
            for (String split : splits){
208
                if ("al.".equals(split.trim())){
209
                    team.setHasMoreMembers(true);
210
                }else{
211
                    team.addTeamMember(getPerson(split, line));
212
                }
213
            }
214
        }else{
215
            result = getPerson(splits[0], line);
216
        }
217
        if (ed.length() > 0){
218
            result.setTitleCache(result.getTitleCache() + ed, true);
219
        }
220
        return result;
221
    }
222

  
223
    private Person getPerson(String personStr, String line) {
224
        Person result = Person.NewInstance();
225
        String regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?(?<famname>[A-Z][a-zèéöü]+((\\-|\\s(i|de)?\\s*)[A-Z][a-zèéü]+)?)";
226
//        regEx = "([A-ZÉ]\\.\\-?)+((de|von)\\s)?Boissier";
227
        Matcher matcher = Pattern.compile(regEx).matcher(personStr);
228
        if (matcher.matches()){
229
            String famName = matcher.group("famname");
230
            result.setFamilyName(famName);
231
            String initials = personStr.replace(famName,"").trim();
232
            result.setInitials(initials);
233
        }else{
234
            result.setTitleCache(personStr, true);
235
            logger.warn(line + "BookSection author could not be parsed: " +  personStr);
236
        }
237
        return result;
238
    }
239

  
240
    private String replaceBookSectionAuthor(SimpleExcelTaxonImportState<CONFIG> state, String fullTitle) {
241
        String type = getValue(state, PubTypeABSG);
242
        if ("BS".equals(type)){
243
            String pubAuthor = getValue(state, publication_author);
244
            int inIndex = fullTitle.indexOf(" in ");
245
            int commaIndex = fullTitle.indexOf(", ");
246

  
247
        }
248
        return fullTitle;
249
    }
250

  
251
    private void verifyName(SimpleExcelTaxonImportState<CONFIG> state, TaxonName taxonName,
252
            Map<String, String> record, String line, boolean isNew) {
253
        if (isNew){
254
            boolean parsed = checkParsed(taxonName, getValue(state, Kew_Name_Citation), null, line);
255
            if (!parsed){
256
                return;
257
            }
258
        }
259
        String fullDiff = verifyField(replaceStatus(taxonName.getFullTitleCache()), record, Kew_Name_Citation, line, null, isNew);
260
        verifyField(taxonName.getGenusOrUninomial(), record, GENUS, line, null, isNew);
261
        verifyField(taxonName.getSpecificEpithet(), record, SPECIES, line, null, isNew);
262
        verifyField(taxonName.getInfraSpecificEpithet(), record, infraspecies, line, null, isNew);
263
        String existingBasionymAuthor = authorAndExAuthor(taxonName.getBasionymAuthorship(), taxonName.getExBasionymAuthorship());
264
        verifyField(existingBasionymAuthor, record, parenthetical_author, line, null, isNew);
265
        String existingCombinationAuthor = authorAndExAuthor(taxonName.getCombinationAuthorship(), taxonName.getExCombinationAuthorship());
266
        verifyField(existingCombinationAuthor, record, primary_author, line, null, isNew);
267

  
268
        //reference
269
        Reference nomRef = taxonName.getNomenclaturalReference();
270
        if (nomRef == null){
271
            logger.warn(line + "no nom.ref. exists in existing name");
272
        }else{
273

  
274
            //place of publication
275
            boolean hasInRef = nomRef.getInReference() != null;
276
            String existingAbbrevTitle = hasInRef && (nomRef.getType() == ReferenceType.BookSection || nomRef.getType() == ReferenceType.Article) ?
277
                    nomRef.getInReference().getAbbrevTitle() :
278
                    nomRef.getAbbrevTitle();
279
            String diffPlacePub = verifyField(existingAbbrevTitle, record, place_of_publication, line, fullDiff, isNew);
280
            //author
281
            String inRefAuthor = (!hasInRef || nomRef.getInReference().getAuthorship() == null) ? null : nomRef.getInReference().getAuthorship().getTitleCache();
282
            verifyField(inRefAuthor, record, publication_author, line, fullDiff, isNew);
283
            //vol and page
284
            String existingVolume = getVolume(nomRef);
285
            String existingVolAndPage = CdmUtils.Nz(existingVolume) + ": " + CdmUtils.Nz(taxonName.getNomenclaturalSource().getCitationMicroReference());
286
            verifyField(existingVolAndPage, record, volume_and_page, line, fullDiff, diffPlacePub, isNew);
287
            //year
288
            verifyField(nomRef.getYear(), record, KewYear4CDM, line, fullDiff, isNew);
289
            //pub type
290
            verifyField(abbrefRefType(nomRef.getType()), record, PubTypeABSG, line, null, isNew);
291
        }
292
    }
293

  
294
    private String getVolume(Reference nomRef) {
295
        Reference ref = nomRef.isBookSection()? nomRef.getInReference(): nomRef;
296
        String vol = ref.getVolume();
297
        String edition = ref.getEdition();
298
        if (StringUtils.isNotBlank(edition)){
299
            edition = ", " + (isNumber(edition)? "ed. ":"") + edition + ",";
300
        }
301
        String series = ref.getSeriesPart();
302
        if (StringUtils.isNotBlank(series)){
303
            series = ", " + (isNumber(series)? "ser. ":"") + series + ",";
304
        }
305

  
306
        return vol;
307
    }
308

  
309
    private boolean isNumber(String edition) {
310
        try {
311
            Integer.valueOf(edition);
312
        } catch (NumberFormatException e) {
313
            return false;
314
        }
315
        return true;
316
    }
317

  
318
    private String authorAndExAuthor(TeamOrPersonBase<?> author,
319
            TeamOrPersonBase<?> exAuthor) {
320
        return author == null? null : (exAuthor != null ? (exAuthor.getNomenclaturalTitleCache() + " ex "): "")
321
                + author.getNomenclaturalTitleCache();
322
    }
323

  
324
    private String replaceStatus(String fullTitleCache) {
325
        return fullTitleCache.replaceAll(", nom\\. inval\\.$", "").replaceAll(", nom\\. illeg\\.$", "");
326
    }
327

  
328
    private String abbrefRefType(ReferenceType type) {
329
        return type == ReferenceType.Article ? "A" :
330
            type == ReferenceType.Book ? "B" :
331
            type == ReferenceType.BookSection ? "BS" :
332
            type == ReferenceType.Generic ? "GEN" :
333
            type.getLabel() ;
334
    }
335

  
336
    private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line, String noLogIf, boolean isNew) {
337
        return verifyField(expectedValue, record, fieldName, line, noLogIf, null, isNew);
338
    }
339

  
340
    private String verifyField(String expectedValue, Map<String, String> record, String fieldName, String line,
341
            String noLogIf, String noLogIf2, boolean isNew) {
342
        String value = getValue(record, fieldName);
343
        if (!CdmUtils.nullSafeEqual(expectedValue, value)){
344
            String diff = singleDiff(expectedValue, value);
345
            String label = isNew ? "New     " : "Existing";
346
            if (!diff.equals(noLogIf) && !diff.equals(noLogIf2) || diff.equals(NO_SIMPLE_DIFF)){
347
                System.out.println("   " + line + fieldName + "\n        "+label+": " + expectedValue + "\n        Kew     : " + value);
348
            }
349
            return diff;
350
        }else{
351
            return "";
352
        }
353
    }
354

  
355
    private String singleDiff(String expectedValue, String value) {
356
        if (expectedValue == null){
357
            return CdmUtils.Nz(value);
358
        }else if (value == null){
359
            return CdmUtils.Nz(expectedValue);
360
        }
361
        expectedValue = expectedValue.trim();
362
        value = value.trim();
363
        String diff_ab = StringUtils.difference(expectedValue, value);
364
        String diff_ba = StringUtils.difference(value, expectedValue);
365
        if (diff_ab.endsWith(diff_ba)){
366
            return "+" + diff_ab.substring(0, diff_ab.length() - diff_ba.length());
367
        }else if (diff_ba.endsWith(diff_ab)){
368
            return "-" + diff_ba.substring(0, diff_ba.length() - diff_ab.length());
369
        }else{
370
            return NO_SIMPLE_DIFF;
371
        }
372
    }
373

  
374
    private TaxonName getExistingName(SimpleExcelTaxonImportState<CONFIG> state, String line) {
375
        String cdmNameUuid = getValue(state, CDM_Name_UUID);
376
        String kewNameId = getValue(state, Kew_Name_ID);
377
        if (cdmNameUuid == null){
378
            return null;
379
        }
380
        TaxonName existingName = getNameService().load(UUID.fromString(cdmNameUuid));
381
        if (existingName != null){
382
            putName(kewNameId, existingName.getUuid(), line);
383
            return CdmBase.deproxy(existingName);
384
        }else{
385
            return null;
386
        }
387
    }
388

  
389
    private void putName(String kewNameId, UUID uuid, String line) {
390
        UUID existingUuid = nameMap.put(kewNameId, uuid);
391
        if (existingUuid != null){
392
            logger.warn(line + "Kew-Name-id already exists: " + kewNameId);
393
        }
394
    }
395

  
396

  
397
    private void makeNameStatus(String line, Map<String, String> record,
398
            TaxonName taxonName) {
399
        String nameStatus = getValue(record, Kew_Nomencl_Status);
400
        NomenclaturalStatusType status;
401
        if (isBlank(nameStatus)){
402
            status = null;
403
        }else if ("Illegitimate".equals(nameStatus)){
404
            status = NomenclaturalStatusType.ILLEGITIMATE();
405
        }else if ("Invalid".equals(nameStatus)){
406
            status = NomenclaturalStatusType.INVALID();
407
        }else{
408
            logger.warn(line + "Nom. status not recognized: " + nameStatus);
409
            status = null;
410
        }
411
        if (status != null){
412
            taxonName.addStatus(NomenclaturalStatus.NewInstance(status));
413
        }
414
    }
415

  
416

  
417
    private TaxonBase<?> makeTaxonBase(SimpleExcelTaxonImportState<CONFIG> state, String line,
418
            Map<String, String> record, TaxonName taxonName, Reference sec) {
419

  
420
        TaxonBase<?> taxonBase;
421
        boolean isUnplaced = false;
422
        String taxStatusStr = getValue(record, Kew_Taxonomic_Status);
423

  
424
        if ("Accepted".equals(taxStatusStr)){
425
            taxonBase = Taxon.NewInstance(taxonName, sec);
426
        }else if ("Synonym".equals(taxStatusStr)){
427
            taxonBase = Synonym.NewInstance(taxonName, sec);
428
        }else if ("Artificial Hybrid".equals(taxStatusStr)){
429
            taxonBase = Synonym.NewInstance(taxonName, sec);
430
        }else if ("Unplaced".equals(taxStatusStr)){
431
            taxonBase = Taxon.NewInstance(taxonName, sec);
432
        }else{
433
            logger.warn(line + "Status not handled: " + taxStatusStr);
434
            return null;
435
        }
436
        taxonBase.addSource(makeOriginalSource(state));
437
        taxonMap.put(getValue(record, Kew_Name_ID), taxonBase.getUuid());
438
        if (taxonBase instanceof Taxon){
439
            UUID existing = taxonMap.get(taxonBase.getName().getNameCache());
440
            if (existing == null || !isUnplaced){
441
                taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
442
            }else if (!isUnplaced){
443
                taxonMap.put(taxonBase.getName().getNameCache(), taxonBase.getUuid());
444
                System.out.println("  " + line + "There is more than 1 taxon with name: " + taxonBase.getName().getNameCache());
445
            }
446
        }
447
        return taxonBase;
448
    }
449

  
450
    int c2 = 0;
451
    @Override
452
    protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
453

  
454
        String kewId = getValue(state, Kew_Name_ID) + ": ";
455
        String line = " (line: " + state.getCurrentLine() + ")";
456
//        System.out.println(line);
457
        if (c2++ % 100 == 0){
458
            this.commitTransaction(state.getTransactionStatus());
459
            this.classification = null;
460
            this.secReference = null;
461
            this.sourceReference = null;
462
            TransactionStatus tx = this.startTransaction();
463
            state.setTransactionStatus(tx);
464
            logger.info(line + "New transaction started.");
465
        }
466
        Map<String, String> record = state.getOriginalRecord();
467

  
468
        Classification classification = getClassification(state);
469
        TaxonBase<?> taxonBase = getTaxon(record);
470
        TaxonName taxonName = taxonBase.getName();
471

  
472
        if (taxonBase.isInstanceOf(Taxon.class)){
473
            Taxon parent = getParent(record, taxonName, line, kewId);
474
            if (parent != null){
475
                classification.addParentChild(parent, CdmBase.deproxy(taxonBase, Taxon.class), null, null);
476
            }
477
        }else if (taxonBase.isInstanceOf(Synonym.class)){
478
            Taxon taxon = getAcceptedTaxon(record, line, kewId);
479
            if (taxon == null){
480
                logger.warn(kewId + "Accepted taxon not found: " + getValue(record, Kew_Rel_Acc_Name_ID) + line);
481
                taxon = getOrphanedSynonymTaxon(state);
482
            }else{
483
                taxon.addSynonym(CdmBase.deproxy(taxonBase, Synonym.class), SynonymType.SYNONYM_OF());
484
            }
485
        }else{
486
            logger.warn("Unhandled");
487
        }
488

  
489
        String basionymId = getValue(record, Kew_Rel_Basionym_Name_ID);
490
        if (basionymId != null){
491
            UUID basionymUuid = nameMap.get(basionymId);
492
            TaxonName basionym = getNameService().find(basionymUuid);
493
            if(basionym == null){
494
                logger.warn(kewId + "Basionym does not exist: " + basionymId + line);
495
            }else{
496
                taxonName.addBasionym(basionym);
497
                taxonName.mergeHomotypicGroups(basionym);  //just in case this is not automatically done
498
                //TODO
499
                //          adjustSynonymType(taxonBase, basionymTaxon, line);
500
            }
501
        }
502

  
503
    }
504

  
505
    private Taxon getOrphanedSynonymTaxon(SimpleExcelTaxonImportState<CONFIG> state) {
506
        UUID uuid = UUID.fromString(KEW_ORPHANED_PLACEHOLDER_TAXON);
507
        Taxon placeholderTaxon = CdmBase.deproxy(getTaxonService().find(uuid), Taxon.class);
508
        if (placeholderTaxon == null){
509
            TaxonName placeholderName = TaxonNameFactory.NewBacterialInstance(Rank.SUBFAMILY());
510
            placeholderName.setTitleCache("Orphaned_Synonyms_KEW", true);
511
            placeholderTaxon = Taxon.NewInstance(placeholderName, getSecReference(state, state.getOriginalRecord()));
512
            Taxon unplacedTaxon = CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
513
            getClassification(state).addParentChild(unplacedTaxon, placeholderTaxon, null, null);
514
        }
515
        return placeholderTaxon;
516
    }
517

  
518
    private Classification classification;
519
    private Classification getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
520
        if (classification == null){
521
            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
522
        }
523
        return classification;
524
    }
525

  
526
    private Taxon getAcceptedTaxon(Map<String, String> record, String line, String kewId) {
527
        String statusStr = getValue(record, Kew_Taxonomic_Status);
528
        if ("Synonym".equals(statusStr) || "Artificial Hybrid".equals(statusStr) ){
529
            String accKewId = getValue(record, Kew_Rel_Acc_Name_ID);
530
            UUID accUuid = taxonMap.get(accKewId);
531
            TaxonBase<?> accBase = getTaxonService().find(accUuid);
532
            if (accBase == null){
533
                logger.warn(kewId + "Accepted Taxon does not exist: " + accKewId + line);
534
                return null;
535
            }else if (accBase.isInstanceOf(Synonym.class)){
536
                logger.warn(kewId + "Accepted Taxon is synonym: " + accKewId + line);
537
                return null;
538
            }else{
539
                return CdmBase.deproxy(accBase, Taxon.class);
540
            }
541
        }else{
542
            logger.warn(kewId + "Parent not retrieved" +  line);
543
            return null;
544
        }
545
    }
546

  
547
    private Taxon getParent(Map<String, String> record, TaxonName taxonName, String line, String kewId) {
548
        String statusStr = getValue(record, Kew_Taxonomic_Status);
549
        if ("Unplaced".equals(statusStr)){
550
            return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_UNPLACED_NODE)), Taxon.class);
551
        }else if ("Artificial Hybrid".equals(statusStr)){
552
            return null ; //getTaxonNodeService().find(UUID.fromString(KEW_HYBRIDS_NODE)); hybrids are handled as synonyms now
553
        }else if ("Accepted".equals(statusStr)){
554
            String higherName = getHigherRankName(taxonName);
555
            UUID parentTaxonUuid = higherName == null ? null : taxonMap.get(higherName);
556
            if (parentTaxonUuid != null){
557
                TaxonBase<?> parentBase = getTaxonService().find(parentTaxonUuid);
558
                if (parentBase == null){
559
                    return null;
560
                } else if (parentBase.isInstanceOf(Taxon.class)){
561
                    Taxon parentTaxon = CdmBase.deproxy(parentBase, Taxon.class);
562
                    return parentTaxon;
563
                } else {
564
                    logger.warn(kewId + "Parent is synonym " + line);
565
                    return null;
566
                }
567
            }else{
568
                return CdmBase.deproxy(getTaxonService().find(UUID.fromString(KEW_ACCEPTED_NODE)), Taxon.class);
569
            }
570
        }else if ("Synonym".equals(statusStr)){
571
            //not relevant
572
            return null;
573
        }else{
574
            logger.warn(kewId + "Parent not retrieved" + line);
575
            return null;
576
        }
577
    }
578

  
579
    private String getHigherRankName(TaxonName taxonName) {
580
        if (Rank.SPECIES().equals(taxonName.getRank())){
581
            return taxonName.getGenusOrUninomial();
582
        }else if (taxonName.isInfraSpecific()){
583
            return taxonName.getGenusOrUninomial() + " " + taxonName.getSpecificEpithet();
584
        }
585
        return null;
586
    }
587

  
588
    private void adjustSynonymType(TaxonBase<?> taxonBase, TaxonBase<?> homotypicTaxon, String line) {
589
        adjustSynonymTypeOrdered(taxonBase, homotypicTaxon, line);
590
        adjustSynonymTypeOrdered(homotypicTaxon, taxonBase, line);
591
    }
592

  
593
    private void adjustSynonymTypeOrdered(TaxonBase<?> firstTaxon, TaxonBase<?> secondTaxon, String line) {
594
        if (firstTaxon == null){
595
            logger.warn(line + "first taxon is null for adjust synonym type");
596
        }else if (secondTaxon == null){
597
            logger.warn(line + "second taxon is null for adjust synonym type");
598
        }else if (secondTaxon.isInstanceOf(Synonym.class)){
599
            Synonym syn = CdmBase.deproxy(secondTaxon, Synonym.class);
600
            if (firstTaxon.equals(syn.getAcceptedTaxon())){
601
                syn.setType(SynonymType.HOMOTYPIC_SYNONYM_OF());
602
            }
603
        }
604
    }
605

  
606
    protected TaxonBase<?> getTaxon(Map<String, String> record) {
607
        String kew_name_id = getValue(record, Kew_Name_ID);
608
        UUID taxonUuid = taxonMap.get(kew_name_id);
609
        TaxonBase<?> taxon = getTaxonService().find(taxonUuid);
610
        return taxon;
611
    }
612

  
613
	private boolean checkParsed(TaxonName name, String fullName, String nameStr, String line) {
614
		boolean result = true;
615
	    if (name.isProtectedTitleCache() || name.isProtectedFullTitleCache() || name.isProtectedNameCache()) {
616
			logger.warn(line + "Name could not be parsed: " + fullName);
617
			result = false;
618
		}
619
		Reference nomRef = name.getNomenclaturalReference();
620
		if (nomRef != null && (nomRef.isProtectedTitleCache()
621
		        || nomRef.getInReference() != null && nomRef.getInReference().isProtectedTitleCache())){
622
		    logger.warn(line + "Nom ref could not be parsed: " + fullName);
623
            result = false;
624
		}
625
		if (nameStr != null && !name.getTitleCache().equals(nameStr)){
626
            logger.warn(line + "Name part not parsed correctly: " + name.getTitleCache() + "<-> expected: " + nameStr);
627
            result = false;
628
        }
629
		return result;
630
	}
631

  
632
    private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record) {
633
        if (this.secReference == null){
634
            logger.warn("Load sec ref");
635
            String secUuid = record.get(Sec_Ref_CDM_UUID);
636
            secReference = getReferenceService().load(UUID.fromString(secUuid));
637
            if (this.secReference == null){
638
                logger.warn("Sec ref is null");
639
            }
640
        }
641
        return this.secReference;
642
    }
643

  
644
    private Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
645
        if (this.sourceReference == null){
646
            this.sourceReference = getPersistentReference(state.getConfig().getSourceReference());
647
        }
648
        return this.sourceReference;
649
    }
650

  
651
    private Reference getPersistentReference(Reference reference) {
652
        Reference result = getReferenceService().find(reference.getUuid());
653
        logger.warn("Loaded persistent reference: "+ reference.getUuid());
654
        if (result == null){
655
            logger.warn("Persistent reference is null: " + reference.getUuid());
656
            result = reference;
657
            getReferenceService().saveOrUpdate(result);
658
        }
659
        return result;
660
    }
661

  
662
    private void replaceNameAuthorsAndReferences(SimpleExcelTaxonImportState<CONFIG> state, INonViralName name) {
663
        state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
664
    }
665

  
666

  
667
    @Override
668
    protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
669
    	String noStr = getValue(state.getOriginalRecord(), Kew_Name_ID);
670
        return IdentifiableSource.NewDataImportInstance(noStr, Kew_Name_ID, getSourceCitation(state));
671
    }
672
}
app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/KewExcelTaxonImportConfigurator.java
1
/**
2
* Copyright (C) 2016 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

  
11
import eu.etaxonomy.cdm.common.URI;
12
import eu.etaxonomy.cdm.database.ICdmDataSource;
13
import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer;
14
import eu.etaxonomy.cdm.io.excel.common.ExcelImportConfiguratorBase;
15
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
16
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
17
import eu.etaxonomy.cdm.model.reference.Reference;
18

  
19
/**
20
 * Configurator for Kew excel taxon import for Caryophyllaceae.
21
 *
22
 * @author a.mueller
23
 * @since 05.01.2022
24
 */
25
public class KewExcelTaxonImportConfigurator
26
        extends ExcelImportConfiguratorBase{
27

  
28
    private static final long serialVersionUID = -1819917445326422841L;
29

  
30
    private static IInputTransformer defaultTransformer = null;
31
    private Reference secReference;
32

  
33
    public static KewExcelTaxonImportConfigurator NewInstance(URI source, ICdmDataSource destination) {
34
        return new KewExcelTaxonImportConfigurator(source, destination);
35
    }
36

  
37
    private KewExcelTaxonImportConfigurator(URI source, ICdmDataSource destination) {
38
        super(source, destination, defaultTransformer);
39
        setNomenclaturalCode(NomenclaturalCode.ICNAFP);
40
        setSource(source);
41
        setDestination(destination);
42
     }
43

  
44
    @SuppressWarnings({ "unchecked", "rawtypes" })
45
    @Override
46
    public SimpleExcelTaxonImportState getNewState() {
47
        return new SimpleExcelTaxonImportState<>(this);
48
    }
49

  
50
    @SuppressWarnings("unchecked")
51
	@Override
52
    protected void makeIoClassList() {
53
        ioClassList = new Class[]{
54
                KewExcelTaxonImport.class,
55
        };
56
    }
57

  
58
    public Reference getSecReference() {
59
        return secReference;
60
    }
61
    public void setSecReference(Reference secReference) {
62
        this.secReference = secReference;
63
    }
64
}

Also available in: Unified diff