1
|
/**
|
2
|
* Copyright (C) 2016 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.redlist.germanSL;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.List;
|
14
|
import java.util.Map;
|
15
|
import java.util.Set;
|
16
|
import java.util.UUID;
|
17
|
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.springframework.stereotype.Component;
|
20
|
|
21
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
22
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
23
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
24
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
25
|
import eu.etaxonomy.cdm.model.common.Language;
|
26
|
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
|
27
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
28
|
import eu.etaxonomy.cdm.model.location.Country;
|
29
|
import eu.etaxonomy.cdm.model.name.IBotanicalName;
|
30
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
31
|
import eu.etaxonomy.cdm.model.name.Rank;
|
32
|
import eu.etaxonomy.cdm.model.name.RankClass;
|
33
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
34
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
35
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
36
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
37
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
38
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
39
|
import eu.etaxonomy.cdm.model.term.DefinedTerm;
|
40
|
import eu.etaxonomy.cdm.model.term.OrderedTermVocabulary;
|
41
|
|
42
|
/**
|
43
|
* @author a.mueller
|
44
|
* @since 25.11.2016
|
45
|
*
|
46
|
*/
|
47
|
@Component
|
48
|
public class GermanSLTaxonImport
|
49
|
extends GermanSLImporBase {
|
50
|
|
51
|
private static final long serialVersionUID = 236093186271666895L;
|
52
|
|
53
|
private static final Logger logger = Logger.getLogger(GermanSLTaxonImport.class);
|
54
|
|
55
|
static final String SPECIES_NR = "SPECIES_NR";
|
56
|
private static final String AUTHOR = "AUTHOR";
|
57
|
private static final String ABBREVIAT = "ABBREVIAT";
|
58
|
private static final String SEC = "SECUNDUM";
|
59
|
private static final String RANG = "RANG";
|
60
|
private static final String EXTERNAL_ID = "external_ID";
|
61
|
private static final String GRUPPE = "GRUPPE";
|
62
|
static final String VALID_NR = "VALID_NR";
|
63
|
static final String SYNONYM = "SYNONYM";
|
64
|
private static final String NATIVENAME = "NATIVENAME";
|
65
|
private static final String LETTER_CODE = "LETTERCODE";
|
66
|
static final String AGG = "AGG";
|
67
|
|
68
|
private static final String AGG_NAME = "AGG_NAME";
|
69
|
private static final String VALID_NAME = "VALID_NAME";
|
70
|
|
71
|
private static final String NACHWEIS = "NACHWEIS";
|
72
|
private static final String HYBRID = "HYBRID";
|
73
|
private static final String BEGRUEND = "BEGRUEND";
|
74
|
private static final String EDITSTATUS = "EDITSTATUS";
|
75
|
|
76
|
private static final String UUID_ = "UUID";
|
77
|
|
78
|
public static final String TAXON_NAMESPACE = "1.3.4";
|
79
|
|
80
|
@Override
|
81
|
protected String getWorksheetName(GermanSLImportConfigurator config) {
|
82
|
return "1.3.4";
|
83
|
}
|
84
|
|
85
|
//dirty I know, but who cares, needed by distribution and common name import
|
86
|
protected static final Map<String, TaxonBase<?>> taxonIdMap = new HashMap<>();
|
87
|
|
88
|
|
89
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
90
|
SPECIES_NR,EXTERNAL_ID,ABBREVIAT,
|
91
|
AUTHOR,SEC,SYNONYM,
|
92
|
LETTER_CODE, AGG,
|
93
|
NATIVENAME,VALID_NR,RANG,GRUPPE,
|
94
|
UUID_,
|
95
|
NACHWEIS, HYBRID, BEGRUEND, EDITSTATUS, AGG_NAME, VALID_NAME
|
96
|
});
|
97
|
|
98
|
|
99
|
@Override
|
100
|
protected void firstPass(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
101
|
String line = state.getCurrentLine() + ": ";
|
102
|
Map<String, String> record = state.getOriginalRecord();
|
103
|
|
104
|
Set<String> keys = record.keySet();
|
105
|
|
106
|
checkAllKeysExist(line, keys, expectedKeys);
|
107
|
|
108
|
//Name
|
109
|
NameResult nameResult = makeName(line, record, state);
|
110
|
IBotanicalName taxonName = nameResult.name;
|
111
|
|
112
|
//sec
|
113
|
String secRefStr = getValue(record, SEC);
|
114
|
Reference sec = getSecRef(state, secRefStr, line);
|
115
|
|
116
|
//status
|
117
|
String statusStr = getValue(record, SYNONYM);
|
118
|
TaxonBase<?> taxonBase;
|
119
|
if (isAccepted(statusStr, nameResult)){
|
120
|
taxonBase = Taxon.NewInstance(taxonName, sec);
|
121
|
// if (nameResult.proParte){
|
122
|
// logger.warn(line + "accepted taxon can not be pro parte in GermanSL");
|
123
|
// }
|
124
|
}else{
|
125
|
Synonym syn = Synonym.NewInstance(taxonName, sec);
|
126
|
// if (nameResult.proParte){
|
127
|
// syn.setProParte(true);
|
128
|
// }
|
129
|
taxonBase = syn;
|
130
|
}
|
131
|
if (!isBlank(nameResult.sensu)){
|
132
|
taxonBase.setAppendedPhrase(nameResult.sensu);
|
133
|
}
|
134
|
//TODO right order?
|
135
|
taxonBase.setAppendedPhrase(CdmUtils.concat(" ", nameResult.auct, taxonBase.getAppendedPhrase()));
|
136
|
|
137
|
//lettercode
|
138
|
String lettercode = getValue(record, LETTER_CODE);
|
139
|
if (isNotBlank(lettercode)){
|
140
|
UUID idTypeUUID;
|
141
|
try {
|
142
|
idTypeUUID = state.getTransformer().getIdentifierTypeUuid("LETTERCODE");
|
143
|
DefinedTerm idType = getIdentiferType(state, idTypeUUID, "GermanSL lettercode", "GermanSL lettercode", "LETTERCODE", null);
|
144
|
taxonBase.addIdentifier(lettercode, idType);
|
145
|
} catch (UndefinedTransformerMethodException e) {
|
146
|
e.printStackTrace();
|
147
|
}
|
148
|
}
|
149
|
|
150
|
// //annotation
|
151
|
// String annotation = getValue(record, "Anotacion al Taxon");
|
152
|
// if (annotation != null && (!annotation.equals("nom. illeg.") || !annotation.equals("nom. cons."))){
|
153
|
// taxonBase.addAnnotation(Annotation.NewInstance(annotation, AnnotationType.EDITORIAL(), Language.SPANISH_CASTILIAN()));
|
154
|
// }
|
155
|
|
156
|
//UUID
|
157
|
String uuid = getValue(record, UUID_);
|
158
|
//TOOD why sometimes null?
|
159
|
if (uuid != null){
|
160
|
taxonBase.setUuid(UUID.fromString(uuid));
|
161
|
}
|
162
|
|
163
|
|
164
|
//NATIVE NAME
|
165
|
String commonNameStr = getValue(record, NATIVENAME);
|
166
|
//Ann.: synonym common names should be removed!
|
167
|
if (isNotBlank(commonNameStr)){
|
168
|
makeCommonName(commonNameStr, taxonBase, line);
|
169
|
}
|
170
|
|
171
|
|
172
|
//id
|
173
|
String id = getValue(record, SPECIES_NR);
|
174
|
this.addOriginalSource(taxonBase, id, TAXON_NAMESPACE, getSourceReference(state));
|
175
|
|
176
|
//save
|
177
|
getTaxonService().saveOrUpdate(taxonBase);
|
178
|
saveNameRelations(taxonBase.getName());
|
179
|
taxonIdMap.put(id, taxonBase);
|
180
|
}
|
181
|
|
182
|
|
183
|
|
184
|
private String removeProparte(String authorStr) {
|
185
|
String regEx = "\\s+p\\.\\s*p\\.$";
|
186
|
if (authorStr == null || !authorStr.matches(".*" + regEx)){
|
187
|
return authorStr;
|
188
|
}else{
|
189
|
return authorStr.replaceAll(regEx, "");
|
190
|
}
|
191
|
}
|
192
|
|
193
|
private String removeSensuLatoStricto(String authorStr) {
|
194
|
String regEx = "\\s+s\\.\\s*(l|str)\\.$";
|
195
|
|
196
|
if (authorStr == null || !authorStr.matches(".*" + regEx)){
|
197
|
return authorStr;
|
198
|
}else{
|
199
|
return authorStr.replaceAll(regEx, "");
|
200
|
}
|
201
|
}
|
202
|
|
203
|
private String removeAuct(String authorStr) {
|
204
|
String regEx = "auct\\.\\??$";
|
205
|
|
206
|
if (authorStr == null || !authorStr.matches(/*".*" + */regEx)){
|
207
|
return authorStr;
|
208
|
}else{
|
209
|
return ""; //authorStr.replaceAll(regEx, "");
|
210
|
}
|
211
|
}
|
212
|
|
213
|
|
214
|
/**
|
215
|
* @param state
|
216
|
* @param secRefStr
|
217
|
* @return
|
218
|
*/
|
219
|
private Reference getSecRef(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, String secRefStr, String line) {
|
220
|
Reference result = state.getReference(secRefStr);
|
221
|
if (result == null && secRefStr != null){
|
222
|
result = ReferenceFactory.newGeneric();
|
223
|
result.setTitleCache(secRefStr, true);
|
224
|
state.putReference(secRefStr, result);
|
225
|
}
|
226
|
|
227
|
return result;
|
228
|
}
|
229
|
|
230
|
|
231
|
|
232
|
/**
|
233
|
* @param record
|
234
|
* @param state
|
235
|
* @return
|
236
|
*/
|
237
|
public NameResult makeName(String line, Map<String, String> record, SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
238
|
|
239
|
String specieNrStr = getValue(record, SPECIES_NR);
|
240
|
String nameStr = getValue(record, ABBREVIAT);
|
241
|
String authorStr = getValue(record, AUTHOR);
|
242
|
String rankStr = getValue(record, RANG);
|
243
|
|
244
|
NameResult result = new NameResult();
|
245
|
|
246
|
//rank
|
247
|
Rank rank = makeRank(line, state, rankStr);
|
248
|
|
249
|
//name
|
250
|
nameStr = normalizeNameStr(nameStr);
|
251
|
String nameStrWithoutSensu = removeSensuLatoStricto(nameStr);
|
252
|
if (nameStrWithoutSensu.length() < nameStr.length()){
|
253
|
result.sensu = nameStr.substring(nameStrWithoutSensu.length()).trim();
|
254
|
nameStr = nameStrWithoutSensu;
|
255
|
}
|
256
|
|
257
|
//author
|
258
|
//pp
|
259
|
authorStr = normalizeAuthorStr(authorStr);
|
260
|
String authorStrWithoutProParte = removeProparte(authorStr);
|
261
|
result.proParte = authorStrWithoutProParte.length() < authorStr.length();
|
262
|
authorStr = authorStrWithoutProParte;
|
263
|
|
264
|
//auct.
|
265
|
String authorStrWithoutAuct = removeAuct(authorStr);
|
266
|
if (authorStrWithoutAuct.length() < authorStr.length()){
|
267
|
result.auct = authorStr.substring(authorStrWithoutAuct.length()).trim();
|
268
|
}
|
269
|
authorStr = authorStrWithoutAuct;
|
270
|
|
271
|
|
272
|
//name+author
|
273
|
String fullNameStr = CdmUtils.concat(" ", nameStr, authorStr);
|
274
|
|
275
|
IBotanicalName fullName = (IBotanicalName)nameParser.parseReferencedName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
|
276
|
if (fullName.isProtectedTitleCache()){
|
277
|
logger.warn(line + "Name could not be parsed: " + fullNameStr );
|
278
|
}else{
|
279
|
state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(state, fullName);
|
280
|
// replaceAuthorNamesAndNomRef(state, fullName);
|
281
|
}
|
282
|
// BotanicalName existingName = getExistingName(state, fullName);
|
283
|
|
284
|
//TODO handle existing name
|
285
|
IBotanicalName name = fullName;
|
286
|
this.addOriginalSource(name, specieNrStr, TAXON_NAMESPACE + "_Name", getSourceReference(state));
|
287
|
|
288
|
result.name = name;
|
289
|
return result;
|
290
|
}
|
291
|
|
292
|
private Rank makeRank(String line, SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, String rankStr) {
|
293
|
Rank rank = null;
|
294
|
try {
|
295
|
rank = state.getTransformer().getRankByKey(rankStr);
|
296
|
if (rank == null){
|
297
|
UUID rankUuid = state.getTransformer().getRankUuid(rankStr);
|
298
|
OrderedTermVocabulary<Rank> voc = (OrderedTermVocabulary<Rank>)Rank.SPECIES().getVocabulary();
|
299
|
//TODO
|
300
|
Rank lowerRank = Rank.FORM();
|
301
|
rank = getRank(state, rankUuid, rankStr, rankStr, rankStr, voc, lowerRank, RankClass.Infraspecific);
|
302
|
if (rank == null){
|
303
|
logger.warn(line + "Rank not recognized: " + rankStr);
|
304
|
}
|
305
|
}
|
306
|
} catch (Exception e1) {
|
307
|
logger.warn(line + "Exception when trying to define rank '" + rankStr + "': " + e1.getMessage());
|
308
|
e1.printStackTrace();
|
309
|
}
|
310
|
return rank;
|
311
|
}
|
312
|
|
313
|
|
314
|
/**
|
315
|
* @param authorStr
|
316
|
* @return
|
317
|
*/
|
318
|
private String normalizeAuthorStr(String authorStr) {
|
319
|
if (isBlank(authorStr)){
|
320
|
return "";
|
321
|
}else{
|
322
|
if (authorStr.equals("-") || authorStr.equals("#")){
|
323
|
authorStr = "";
|
324
|
}
|
325
|
return authorStr;
|
326
|
}
|
327
|
}
|
328
|
|
329
|
private String normalizeNameStr(String nameStr) {
|
330
|
nameStr = nameStr
|
331
|
.replace(" agg.", " aggr.")
|
332
|
.replace(" fo. ", " f. ")
|
333
|
;
|
334
|
return nameStr;
|
335
|
}
|
336
|
|
337
|
|
338
|
boolean nameMapIsInitialized = false;
|
339
|
/**
|
340
|
* @param state
|
341
|
* @param fullName
|
342
|
* @return
|
343
|
*/
|
344
|
private IBotanicalName getExistingName(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, IBotanicalName fullName) {
|
345
|
initExistinNames(state);
|
346
|
return (IBotanicalName)state.getName(fullName.getTitleCache());
|
347
|
}
|
348
|
|
349
|
/**
|
350
|
* @param state
|
351
|
*/
|
352
|
@SuppressWarnings("rawtypes")
|
353
|
private void initExistinNames(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
354
|
if (!nameMapIsInitialized){
|
355
|
List<String> propertyPaths = Arrays.asList("");
|
356
|
List<TaxonName> existingNames = this.getNameService().list(null, null, null, null, propertyPaths);
|
357
|
for (TaxonName tnb : existingNames){
|
358
|
state.putName(tnb.getTitleCache(), tnb);
|
359
|
}
|
360
|
nameMapIsInitialized = true;
|
361
|
}
|
362
|
}
|
363
|
|
364
|
|
365
|
/**
|
366
|
* @param commmonNameStr
|
367
|
* @param taxonBase
|
368
|
*/
|
369
|
private void makeCommonName(String commmonNameStr, TaxonBase<?> taxonBase, String line) {
|
370
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
371
|
//synonym common names should be neglected
|
372
|
return;
|
373
|
}
|
374
|
Taxon acceptedTaxon = getAccepted(taxonBase);
|
375
|
if (acceptedTaxon != null){
|
376
|
TaxonDescription desc = getTaxonDescription(acceptedTaxon, false, true);
|
377
|
desc.setDefault(true);
|
378
|
CommonTaxonName commonName = CommonTaxonName.NewInstance(commmonNameStr, Language.GERMAN(), Country.GERMANY());
|
379
|
desc.addElement(commonName);
|
380
|
}else{
|
381
|
logger.warn(line + "No accepted taxon available");
|
382
|
}
|
383
|
|
384
|
}
|
385
|
|
386
|
|
387
|
/**
|
388
|
* @param next
|
389
|
* @return
|
390
|
*/
|
391
|
private Taxon getAccepted(TaxonBase<?> taxonBase) {
|
392
|
if (taxonBase.isInstanceOf(Taxon.class)){
|
393
|
return CdmBase.deproxy(taxonBase, Taxon.class);
|
394
|
}else{
|
395
|
Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
|
396
|
return syn.getAcceptedTaxon();
|
397
|
}
|
398
|
}
|
399
|
|
400
|
|
401
|
@Override
|
402
|
protected boolean isIgnore(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
403
|
return ! state.getConfig().isDoTaxa();
|
404
|
}
|
405
|
}
|