Project

General

Profile

Download (11.2 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2020 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.caryo;
10

    
11
import java.util.HashSet;
12
import java.util.Map;
13
import java.util.Set;
14

    
15
import org.apache.logging.log4j.LogManager;
16
import org.apache.logging.log4j.Logger;
17
import org.springframework.stereotype.Component;
18

    
19
import eu.etaxonomy.cdm.common.CdmUtils;
20
import eu.etaxonomy.cdm.common.DoubleResult;
21
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
22
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
23
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
24
import eu.etaxonomy.cdm.model.name.TaxonName;
25
import eu.etaxonomy.cdm.model.reference.Reference;
26
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
27
import eu.etaxonomy.cdm.model.reference.ReferenceType;
28
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
29
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
30

    
31
/**
32
 * @author a.mueller
33
 * @since 02.02.2023
34
 */
35
@Component
36
public class CaryoSileneaeNomRefImport extends CaryoSileneaeImportBase {
37

    
38
    private static final long serialVersionUID = 7227226331297614469L;
39
    private static final Logger logger = LogManager.getLogger();
40

    
41
    private static final String NOMEN_ID = "nomen_ID";
42
    private static final String NAME = "name";
43
    private static final String PUBLICATION = "Publication";
44
    private static final String PUB_TYPE_ED = "PubTypeEd";
45
    private static final String PUB_TYPE_KEW = "PubTypeKew";
46
    private static final String PUB_KEW = "PubKew";
47
    private static final String NIMM_KEW = "NimmKew";
48
    private static final String ORIG_SPELLING = "Original spelling";
49
    private static final String NOM_STATUS = "Nom. Status";
50

    
51
    @SuppressWarnings("unused")
52
    private static final String SECOND_PUBLICATION = "SecondPublication";
53
    @SuppressWarnings("unused")
54
    private static final String IMPORT = "import";
55
    @SuppressWarnings("unused")
56
    private static final String DUPL = "dupl";
57

    
58
    private static final NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
59

    
60
    private SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state;
61

    
62
    @Override
63
    protected String getWorksheetName(CaryoSileneaeImportConfigurator config) {
64
        return "NomRef";
65
    }
66

    
67
    @Override
68
    protected void firstPass(SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state) {
69
        int line = state.getCurrentLine();
70
//        if ((line % 500) == 0){
71
//            newTransaction(state);
72
//            System.out.println(line);
73
//        }
74

    
75
        this.state = state;
76
        Map<String, String> record = state.getOriginalRecord();
77

    
78
        Integer nomenId = Integer.valueOf(getValue(record, NOMEN_ID));
79
        String nameStr = getValue(record, NAME);
80
        String origPublication = getValue(record, PUBLICATION);
81
        String pubTypeEd = getValue(record, PUB_TYPE_ED);
82
        String pubTypeKew = getValue(record, PUB_TYPE_KEW);
83
        String pubKew = getValue(record, PUB_KEW);
84

    
85
        String nimmKew = getValue(record, NIMM_KEW);
86
        String origSpelling = getValue(record, ORIG_SPELLING);
87

    
88
        @SuppressWarnings("unused")
89
        String nomStatus = getValue(record, NOM_STATUS);
90

    
91
        String row = String.valueOf(line) + "("+nomenId+"): ";
92

    
93
        origNameMap.remove(nomenId);
94
        TaxonName name = getName(nomenId);
95
        if (name == null) {
96
            return;   //record did not exist
97
            //TODO minor check if it is really a duplicate
98
        }
99

    
100
        boolean isKew = isNotBlank(nimmKew) && "x".equals(nimmKew);
101

    
102
        String publication = isKew ? pubKew : origPublication;
103
        String pubType = isKew ? pubTypeKew : pubTypeEd;
104

    
105
        DoubleResult<String, String> origPubl = origPublicationMap.get(nomenId);
106
        boolean useOrigPubl = false;
107
        if (isBlank(publication) && origPubl != null) {
108
            publication = origPubl.getFirstResult();
109
            useOrigPubl = true;
110
            logger.warn(row + "use original (Nomen.xlsx) publication and/or year");
111
        }
112

    
113
        if ("ined.".equals(publication)) {
114
            publication = null;
115
            NomenclaturalStatusType type = NomenclaturalStatusType.INED();
116
            if (name.hasStatus(type)) {
117
                name.addStatus(type, null, null);
118
            }
119
        }
120

    
121
        ReferenceType refType = getRefType(pubType);
122
        if (refType == null && isNotBlank(publication)) {
123
            logger.warn(row + "reference type not found for: " + publication);
124
        }else if (publication == null) {
125
            if (!name.isAutonym()) {
126
                logger.warn(row + "no publication");
127
            }
128
        }else if (refType == ReferenceType.Article) {
129
            if (!publication.startsWith("in ")) {
130
                publication = " in " + publication;
131
            }else {
132
                publication = " " + publication;
133
            }
134
        }else if (refType == ReferenceType.Book) {
135
            if (publication.startsWith("in ")) {
136
                publication = " " + publication;
137
            }else if (publication.contains(",")) {
138
//                logger.warn(row + "book with ',': " + publication);
139
                String[] split = publication.split(",");
140
                String potentialAuthor = split[0];
141
                if (potentialAuthor.split(" ").length <= 2) {
142
                    boolean noAbbrev = true;
143
                    for(String str : potentialAuthor.split(" ")) {
144
                        if (str.endsWith(".")) {
145
                            noAbbrev = false;
146
                            break;
147
                        }
148
                    }
149
                    if (noAbbrev) {
150
                        refType = ReferenceType.BookSection;
151
                        publication = " in " + publication;
152
                    }else {
153
//                        logger.warn(row + "probably only abbrev title");
154
                        publication = ", " + publication;
155
                    }
156
                } else {
157
//                    logger.warn(row + "probably not booksection");
158
                    publication = ", " + publication;
159
                }
160
            }else {
161
                publication = ", " + publication;
162
            }
163
        }else {
164
            logger.warn(row + "reference type not handled: " + refType);
165
            publication = ", " + publication;
166
        }
167
        String referenceName = CdmUtils.concat("", name.getTitleCache(), publication);
168
        TaxonName parsedName = parser.parseReferencedName(referenceName, NomenclaturalCode.ICNAFP, null);
169
        if (parsedName.isProtectedFullTitleCache() || parsedName.isProtectedTitleCache() ) {
170
            logger.warn(row + "name could not be parsed: " + referenceName);
171
        }else {
172
            Reference ref = parsedName.getNomenclaturalReference();
173
            if (useOrigPubl && origPubl != null && origPubl.getSecondResult() != null) {
174
                if (ref != null) {
175
                    ref.setDatePublished(TimePeriodParser.parseStringVerbatim(origPubl.getSecondResult()));
176
                }else {
177
                    ref = ReferenceFactory.newGeneric();
178
                    ref.setDatePublished(TimePeriodParser.parseStringVerbatim(origPubl.getSecondResult()));
179
                }
180
                logger.warn(row + "set original (Nomen.xlsx) year");
181
            }
182
            name.setNomenclaturalReference(ref);
183
            String microRef = parsedName.getNomenclaturalMicroReference();
184
            name.setNomenclaturalMicroReference(microRef);
185
        }
186

    
187
        //validateName (name);
188
        validateName(name, nameStr, row);
189

    
190
        //deduplicate
191
        dedupliateNameParts(name);
192

    
193
        //orig spelling
194
        if (isNotBlank(origSpelling)) {
195
            TaxonName origName = (TaxonName)parser.parseFullName(origSpelling);
196
            if (origName.isProtectedTitleCache()) {
197
                logger.warn(row + "orig name could not be parsed");
198
            }
199
            if (name.getNomenclaturalSource() == null) {
200
                logger.warn(row + "no nomsource yet");
201
            }
202
            name.getNomenclaturalSource(true).setNameUsedInSource(origName);
203
            origSpellingNames.add(origName);
204
        }
205
    }
206

    
207
    private void validateName(TaxonName name, String nomRefStr, String row) {
208
        nomRefStr = nomRefStr.replace("× ", "×");
209
        nomRefStr = nomRefStr.replace(" unranked ", " [unranked] ");
210
        nomRefStr = nomRefStr.replace(" [infrasp.unranked] ", " [infraspec.] ");
211

    
212
        if (!name.getTitleCache().equals(nomRefStr)) {
213
            TaxonName nomRefName = (TaxonName)parser.parseFullName(nomRefStr, NomenclaturalCode.ICNAFP, null);
214
            if (!nomRefName.getNameCache().equals(name.getNameCache())) {
215
                logger.warn(row+ "nameCache does not match: " + name.getNameCache() + "<->" + nomRefName.getNameCache());
216
                if (!CdmUtils.Nz(name.getAuthorshipCache()).equals(nomRefName.getAuthorshipCache())) {
217
                    logger.warn(row+ "also authorship differs: " + name.getAuthorshipCache() + "<->" + nomRefName.getAuthorshipCache());
218
                }
219
            }else {
220
                logger.warn(row+ "authors/titleCache do not match: " + name.getTitleCache() + "<->" + nomRefStr);
221
            }
222
            if (!CdmUtils.Nz(name.getAuthorshipCache()).equals(nomRefName.getAuthorshipCache())) {
223
                if (isBlank(nomRefName.getAuthorshipCache())) {
224
                    logger.warn(row + "'NomRef' authorship is empty but differs. Kept 'Nomen' authorship");
225
                }else {
226
                    name.setCombinationAuthorship(nomRefName.getCombinationAuthorship());
227
                    name.setExCombinationAuthorship(nomRefName.getExCombinationAuthorship());
228
                    name.setBasionymAuthorship(nomRefName.getBasionymAuthorship());
229
                    name.setExBasionymAuthorship(nomRefName.getExBasionymAuthorship());
230
                }
231
            }
232
        }
233
    }
234

    
235
    private ReferenceType getRefType(String pubType) {
236
        if ("A".equals(pubType)){
237
            return ReferenceType.Article;
238
        }else if ("B".equals(pubType)) {
239
            return ReferenceType.Book;
240
        }
241
        return null;
242
    }
243

    
244
    private TaxonName dedupliateNameParts(TaxonName name) {
245
        if (state.getConfig().isDoDeduplicate()){
246
            state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
247
        }
248
        return name;
249
    }
250

    
251

    
252
    private boolean first = true;
253
    @Override
254
    protected void secondPass(SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state) {
255

    
256
         if (first) {
257
            if (origNameMap.size() > 0) {
258
                logger.warn("There are " +  origNameMap.size() + " unhandled names");
259
                for (Integer key : origNameMap.keySet()) {
260
                    System.out.println(key + ": " + origNameMap.get(key).getTitleCache());
261
                }
262
            }
263

    
264
            Set<TaxonName> commonSet = new HashSet<>(nameMap.values());
265
            commonSet.addAll(origNameMap.values());
266
            commonSet.addAll(origSpellingNames);
267
            try {
268
                getNameService().saveOrUpdate(commonSet);
269
            } catch (Exception e) {
270
                e.printStackTrace();
271
            }
272
            first = false;
273
        }
274
    }
275
}
(9-9/16)