1
|
/**
|
2
|
* Copyright (C) 2020 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.caryo;
|
10
|
|
11
|
import java.util.HashSet;
|
12
|
import java.util.Map;
|
13
|
import java.util.Set;
|
14
|
|
15
|
import org.apache.logging.log4j.LogManager;
|
16
|
import org.apache.logging.log4j.Logger;
|
17
|
import org.springframework.stereotype.Component;
|
18
|
|
19
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
20
|
import eu.etaxonomy.cdm.common.DoubleResult;
|
21
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
22
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
23
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
24
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
25
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
26
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
27
|
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
28
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
29
|
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
|
30
|
|
31
|
/**
|
32
|
* @author a.mueller
|
33
|
* @since 02.02.2023
|
34
|
*/
|
35
|
@Component
|
36
|
public class CaryoSileneaeNomRefImport extends CaryoSileneaeImportBase {
|
37
|
|
38
|
private static final long serialVersionUID = 7227226331297614469L;
|
39
|
private static final Logger logger = LogManager.getLogger();
|
40
|
|
41
|
private static final String NOMEN_ID = "nomen_ID";
|
42
|
private static final String NAME = "name";
|
43
|
private static final String PUBLICATION = "Publication";
|
44
|
private static final String PUB_TYPE_ED = "PubTypeEd";
|
45
|
private static final String PUB_TYPE_KEW = "PubTypeKew";
|
46
|
private static final String PUB_KEW = "PubKew";
|
47
|
private static final String NIMM_KEW = "NimmKew";
|
48
|
private static final String ORIG_SPELLING = "Original spelling";
|
49
|
private static final String NOM_STATUS = "Nom. Status";
|
50
|
|
51
|
@SuppressWarnings("unused")
|
52
|
private static final String SECOND_PUBLICATION = "SecondPublication";
|
53
|
@SuppressWarnings("unused")
|
54
|
private static final String IMPORT = "import";
|
55
|
@SuppressWarnings("unused")
|
56
|
private static final String DUPL = "dupl";
|
57
|
|
58
|
private static final NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
59
|
|
60
|
private SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state;
|
61
|
|
62
|
@Override
|
63
|
protected String getWorksheetName(CaryoSileneaeImportConfigurator config) {
|
64
|
return "NomRef";
|
65
|
}
|
66
|
|
67
|
@Override
|
68
|
protected void firstPass(SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state) {
|
69
|
int line = state.getCurrentLine();
|
70
|
// if ((line % 500) == 0){
|
71
|
// newTransaction(state);
|
72
|
// System.out.println(line);
|
73
|
// }
|
74
|
|
75
|
this.state = state;
|
76
|
Map<String, String> record = state.getOriginalRecord();
|
77
|
|
78
|
Integer nomenId = Integer.valueOf(getValue(record, NOMEN_ID));
|
79
|
String nameStr = getValue(record, NAME);
|
80
|
String origPublication = getValue(record, PUBLICATION);
|
81
|
String pubTypeEd = getValue(record, PUB_TYPE_ED);
|
82
|
String pubTypeKew = getValue(record, PUB_TYPE_KEW);
|
83
|
String pubKew = getValue(record, PUB_KEW);
|
84
|
|
85
|
String nimmKew = getValue(record, NIMM_KEW);
|
86
|
String origSpelling = getValue(record, ORIG_SPELLING);
|
87
|
|
88
|
@SuppressWarnings("unused")
|
89
|
String nomStatus = getValue(record, NOM_STATUS);
|
90
|
|
91
|
String row = String.valueOf(line) + "("+nomenId+"): ";
|
92
|
|
93
|
origNameMap.remove(nomenId);
|
94
|
TaxonName name = getName(nomenId);
|
95
|
if (name == null) {
|
96
|
return; //record did not exist
|
97
|
//TODO minor check if it is really a duplicate
|
98
|
}
|
99
|
|
100
|
boolean isKew = isNotBlank(nimmKew) && "x".equals(nimmKew);
|
101
|
|
102
|
String publication = isKew ? pubKew : origPublication;
|
103
|
String pubType = isKew ? pubTypeKew : pubTypeEd;
|
104
|
|
105
|
DoubleResult<String, String> origPubl = origPublicationMap.get(nomenId);
|
106
|
boolean useOrigPubl = false;
|
107
|
if (isBlank(publication) && origPubl != null) {
|
108
|
publication = origPubl.getFirstResult();
|
109
|
useOrigPubl = true;
|
110
|
logger.warn(row + "use original (Nomen.xlsx) publication and/or year");
|
111
|
}
|
112
|
|
113
|
if ("ined.".equals(publication)) {
|
114
|
publication = null;
|
115
|
NomenclaturalStatusType type = NomenclaturalStatusType.INED();
|
116
|
if (name.hasStatus(type)) {
|
117
|
name.addStatus(type, null, null);
|
118
|
}
|
119
|
}
|
120
|
|
121
|
ReferenceType refType = getRefType(pubType);
|
122
|
if (refType == null && isNotBlank(publication)) {
|
123
|
logger.warn(row + "reference type not found for: " + publication);
|
124
|
}else if (publication == null) {
|
125
|
if (!name.isAutonym()) {
|
126
|
logger.warn(row + "no publication");
|
127
|
}
|
128
|
}else if (refType == ReferenceType.Article) {
|
129
|
if (!publication.startsWith("in ")) {
|
130
|
publication = " in " + publication;
|
131
|
}else {
|
132
|
publication = " " + publication;
|
133
|
}
|
134
|
}else if (refType == ReferenceType.Book) {
|
135
|
if (publication.startsWith("in ")) {
|
136
|
publication = " " + publication;
|
137
|
}else if (publication.contains(",")) {
|
138
|
// logger.warn(row + "book with ',': " + publication);
|
139
|
String[] split = publication.split(",");
|
140
|
String potentialAuthor = split[0];
|
141
|
if (potentialAuthor.split(" ").length <= 2) {
|
142
|
boolean noAbbrev = true;
|
143
|
for(String str : potentialAuthor.split(" ")) {
|
144
|
if (str.endsWith(".")) {
|
145
|
noAbbrev = false;
|
146
|
break;
|
147
|
}
|
148
|
}
|
149
|
if (noAbbrev) {
|
150
|
refType = ReferenceType.BookSection;
|
151
|
publication = " in " + publication;
|
152
|
}else {
|
153
|
// logger.warn(row + "probably only abbrev title");
|
154
|
publication = ", " + publication;
|
155
|
}
|
156
|
} else {
|
157
|
// logger.warn(row + "probably not booksection");
|
158
|
publication = ", " + publication;
|
159
|
}
|
160
|
}else {
|
161
|
publication = ", " + publication;
|
162
|
}
|
163
|
}else {
|
164
|
logger.warn(row + "reference type not handled: " + refType);
|
165
|
publication = ", " + publication;
|
166
|
}
|
167
|
String referenceName = CdmUtils.concat("", name.getTitleCache(), publication);
|
168
|
TaxonName parsedName = parser.parseReferencedName(referenceName, NomenclaturalCode.ICNAFP, null);
|
169
|
if (parsedName.isProtectedFullTitleCache() || parsedName.isProtectedTitleCache() ) {
|
170
|
logger.warn(row + "name could not be parsed: " + referenceName);
|
171
|
}else {
|
172
|
Reference ref = parsedName.getNomenclaturalReference();
|
173
|
if (useOrigPubl && origPubl != null && origPubl.getSecondResult() != null) {
|
174
|
if (ref != null) {
|
175
|
ref.setDatePublished(TimePeriodParser.parseStringVerbatim(origPubl.getSecondResult()));
|
176
|
}else {
|
177
|
ref = ReferenceFactory.newGeneric();
|
178
|
ref.setDatePublished(TimePeriodParser.parseStringVerbatim(origPubl.getSecondResult()));
|
179
|
}
|
180
|
logger.warn(row + "set original (Nomen.xlsx) year");
|
181
|
}
|
182
|
name.setNomenclaturalReference(ref);
|
183
|
String microRef = parsedName.getNomenclaturalMicroReference();
|
184
|
name.setNomenclaturalMicroReference(microRef);
|
185
|
}
|
186
|
|
187
|
//validateName (name);
|
188
|
validateName(name, nameStr, row);
|
189
|
|
190
|
//deduplicate
|
191
|
dedupliateNameParts(name);
|
192
|
|
193
|
//orig spelling
|
194
|
if (isNotBlank(origSpelling)) {
|
195
|
TaxonName origName = (TaxonName)parser.parseFullName(origSpelling);
|
196
|
if (origName.isProtectedTitleCache()) {
|
197
|
logger.warn(row + "orig name could not be parsed");
|
198
|
}
|
199
|
if (name.getNomenclaturalSource() == null) {
|
200
|
logger.warn(row + "no nomsource yet");
|
201
|
}
|
202
|
name.getNomenclaturalSource(true).setNameUsedInSource(origName);
|
203
|
origSpellingNames.add(origName);
|
204
|
}
|
205
|
}
|
206
|
|
207
|
private void validateName(TaxonName name, String nomRefStr, String row) {
|
208
|
nomRefStr = nomRefStr.replace("× ", "×");
|
209
|
nomRefStr = nomRefStr.replace(" unranked ", " [unranked] ");
|
210
|
nomRefStr = nomRefStr.replace(" [infrasp.unranked] ", " [infraspec.] ");
|
211
|
|
212
|
if (!name.getTitleCache().equals(nomRefStr)) {
|
213
|
TaxonName nomRefName = (TaxonName)parser.parseFullName(nomRefStr, NomenclaturalCode.ICNAFP, null);
|
214
|
if (!nomRefName.getNameCache().equals(name.getNameCache())) {
|
215
|
logger.warn(row+ "nameCache does not match: " + name.getNameCache() + "<->" + nomRefName.getNameCache());
|
216
|
if (!CdmUtils.Nz(name.getAuthorshipCache()).equals(nomRefName.getAuthorshipCache())) {
|
217
|
logger.warn(row+ "also authorship differs: " + name.getAuthorshipCache() + "<->" + nomRefName.getAuthorshipCache());
|
218
|
}
|
219
|
}else {
|
220
|
logger.warn(row+ "authors/titleCache do not match: " + name.getTitleCache() + "<->" + nomRefStr);
|
221
|
}
|
222
|
if (!CdmUtils.Nz(name.getAuthorshipCache()).equals(nomRefName.getAuthorshipCache())) {
|
223
|
if (isBlank(nomRefName.getAuthorshipCache())) {
|
224
|
logger.warn(row + "'NomRef' authorship is empty but differs. Kept 'Nomen' authorship");
|
225
|
}else {
|
226
|
name.setCombinationAuthorship(nomRefName.getCombinationAuthorship());
|
227
|
name.setExCombinationAuthorship(nomRefName.getExCombinationAuthorship());
|
228
|
name.setBasionymAuthorship(nomRefName.getBasionymAuthorship());
|
229
|
name.setExBasionymAuthorship(nomRefName.getExBasionymAuthorship());
|
230
|
}
|
231
|
}
|
232
|
}
|
233
|
}
|
234
|
|
235
|
private ReferenceType getRefType(String pubType) {
|
236
|
if ("A".equals(pubType)){
|
237
|
return ReferenceType.Article;
|
238
|
}else if ("B".equals(pubType)) {
|
239
|
return ReferenceType.Book;
|
240
|
}
|
241
|
return null;
|
242
|
}
|
243
|
|
244
|
private TaxonName dedupliateNameParts(TaxonName name) {
|
245
|
if (state.getConfig().isDoDeduplicate()){
|
246
|
state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
|
247
|
}
|
248
|
return name;
|
249
|
}
|
250
|
|
251
|
|
252
|
private boolean first = true;
|
253
|
@Override
|
254
|
protected void secondPass(SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state) {
|
255
|
|
256
|
if (first) {
|
257
|
if (origNameMap.size() > 0) {
|
258
|
logger.warn("There are " + origNameMap.size() + " unhandled names");
|
259
|
for (Integer key : origNameMap.keySet()) {
|
260
|
System.out.println(key + ": " + origNameMap.get(key).getTitleCache());
|
261
|
}
|
262
|
}
|
263
|
|
264
|
Set<TaxonName> commonSet = new HashSet<>(nameMap.values());
|
265
|
commonSet.addAll(origNameMap.values());
|
266
|
commonSet.addAll(origSpellingNames);
|
267
|
try {
|
268
|
getNameService().saveOrUpdate(commonSet);
|
269
|
} catch (Exception e) {
|
270
|
e.printStackTrace();
|
271
|
}
|
272
|
first = false;
|
273
|
}
|
274
|
}
|
275
|
}
|