1
|
/**
|
2
|
* Copyright (C) 2016 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.redlist.germanSL;
|
10
|
|
11
|
import java.util.Arrays;
|
12
|
import java.util.HashMap;
|
13
|
import java.util.List;
|
14
|
import java.util.Map;
|
15
|
import java.util.Set;
|
16
|
import java.util.UUID;
|
17
|
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.springframework.stereotype.Component;
|
20
|
|
21
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
22
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
23
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
24
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
25
|
import eu.etaxonomy.cdm.model.common.DefinedTerm;
|
26
|
import eu.etaxonomy.cdm.model.common.Language;
|
27
|
import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
|
28
|
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
|
29
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
30
|
import eu.etaxonomy.cdm.model.location.Country;
|
31
|
import eu.etaxonomy.cdm.model.name.IBotanicalName;
|
32
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
33
|
import eu.etaxonomy.cdm.model.name.Rank;
|
34
|
import eu.etaxonomy.cdm.model.name.RankClass;
|
35
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
36
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
37
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
38
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
39
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
40
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
41
|
|
42
|
/**
|
43
|
* @author a.mueller
|
44
|
* @since 25.11.2016
|
45
|
*
|
46
|
*/
|
47
|
@Component
|
48
|
public class GermanSLTaxonImport
|
49
|
extends GermanSLImporBase {
|
50
|
|
51
|
private static final long serialVersionUID = 236093186271666895L;
|
52
|
|
53
|
private static final Logger logger = Logger.getLogger(GermanSLTaxonImport.class);
|
54
|
|
55
|
static final String SPECIES_NR = "SPECIES_NR";
|
56
|
private static final String AUTHOR = "AUTHOR";
|
57
|
private static final String ABBREVIAT = "ABBREVIAT";
|
58
|
private static final String SEC = "SECUNDUM";
|
59
|
private static final String RANG = "RANG";
|
60
|
private static final String EXTERNAL_ID = "external_ID";
|
61
|
private static final String GRUPPE = "GRUPPE";
|
62
|
static final String VALID_NR = "VALID_NR";
|
63
|
static final String SYNONYM = "SYNONYM";
|
64
|
private static final String NATIVENAME = "NATIVENAME";
|
65
|
private static final String LETTER_CODE = "LETTERCODE";
|
66
|
static final String AGG = "AGG";
|
67
|
|
68
|
private static final String AGG_NAME = "AGG_NAME";
|
69
|
private static final String VALID_NAME = "VALID_NAME";
|
70
|
|
71
|
private static final String NACHWEIS = "NACHWEIS";
|
72
|
private static final String HYBRID = "HYBRID";
|
73
|
private static final String BEGRUEND = "BEGRUEND";
|
74
|
private static final String EDITSTATUS = "EDITSTATUS";
|
75
|
|
76
|
private static final String UUID_ = "UUID";
|
77
|
|
78
|
|
79
|
public static final String TAXON_NAMESPACE = "1.3.4";
|
80
|
|
81
|
@Override
|
82
|
protected String getWorksheetName() {
|
83
|
return "1.3.4";
|
84
|
}
|
85
|
|
86
|
//dirty I know, but who cares, needed by distribution and commmon name import
|
87
|
protected static final Map<String, TaxonBase<?>> taxonIdMap = new HashMap<>();
|
88
|
|
89
|
|
90
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
91
|
SPECIES_NR,EXTERNAL_ID,ABBREVIAT,
|
92
|
AUTHOR,SEC,SYNONYM,
|
93
|
LETTER_CODE, AGG,
|
94
|
NATIVENAME,VALID_NR,RANG,GRUPPE,
|
95
|
UUID_,
|
96
|
NACHWEIS, HYBRID, BEGRUEND, EDITSTATUS, AGG_NAME, VALID_NAME
|
97
|
});
|
98
|
|
99
|
|
100
|
@Override
|
101
|
protected void firstPass(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
102
|
String line = state.getCurrentLine() + ": ";
|
103
|
HashMap<String, String> record = state.getOriginalRecord();
|
104
|
|
105
|
Set<String> keys = record.keySet();
|
106
|
|
107
|
checkAllKeysExist(line, keys, expectedKeys);
|
108
|
|
109
|
//Name
|
110
|
NameResult nameResult = makeName(line, record, state);
|
111
|
IBotanicalName taxonName = nameResult.name;
|
112
|
|
113
|
//sec
|
114
|
String secRefStr = getValue(record, SEC);
|
115
|
Reference sec = getSecRef(state, secRefStr, line);
|
116
|
|
117
|
|
118
|
//status
|
119
|
String statusStr = getValue(record, SYNONYM);
|
120
|
TaxonBase<?> taxonBase;
|
121
|
if (isAccepted(statusStr, nameResult)){
|
122
|
taxonBase = Taxon.NewInstance(taxonName, sec);
|
123
|
// if (nameResult.proParte){
|
124
|
// logger.warn(line + "accepted taxon can not be pro parte in GermanSL");
|
125
|
// }
|
126
|
}else{
|
127
|
Synonym syn = Synonym.NewInstance(taxonName, sec);
|
128
|
// if (nameResult.proParte){
|
129
|
// syn.setProParte(true);
|
130
|
// }
|
131
|
taxonBase = syn;
|
132
|
}
|
133
|
if (!isBlank(nameResult.sensu)){
|
134
|
taxonBase.setAppendedPhrase(nameResult.sensu);
|
135
|
}
|
136
|
//TODO right order?
|
137
|
taxonBase.setAppendedPhrase(CdmUtils.concat(" ", nameResult.auct, taxonBase.getAppendedPhrase()));
|
138
|
|
139
|
//lettercode
|
140
|
String lettercode = getValue(record, LETTER_CODE);
|
141
|
if (isNotBlank(lettercode)){
|
142
|
UUID idTypeUUID;
|
143
|
try {
|
144
|
idTypeUUID = state.getTransformer().getIdentifierTypeUuid("LETTERCODE");
|
145
|
DefinedTerm idType = getIdentiferType(state, idTypeUUID, "GermanSL lettercode", "GermanSL lettercode", "LETTERCODE", null);
|
146
|
taxonBase.addIdentifier(lettercode, idType);
|
147
|
} catch (UndefinedTransformerMethodException e) {
|
148
|
e.printStackTrace();
|
149
|
}
|
150
|
}
|
151
|
|
152
|
// //annotation
|
153
|
// String annotation = getValue(record, "Anotacion al Taxon");
|
154
|
// if (annotation != null && (!annotation.equals("nom. illeg.") || !annotation.equals("nom. cons."))){
|
155
|
// taxonBase.addAnnotation(Annotation.NewInstance(annotation, AnnotationType.EDITORIAL(), Language.SPANISH_CASTILIAN()));
|
156
|
// }
|
157
|
|
158
|
//UUID
|
159
|
String uuid = getValue(record, UUID_);
|
160
|
//TOOD why sometimes null?
|
161
|
if (uuid != null){
|
162
|
taxonBase.setUuid(UUID.fromString(uuid));
|
163
|
}
|
164
|
|
165
|
|
166
|
//NATIVE NAME
|
167
|
String commonNameStr = getValue(record, NATIVENAME);
|
168
|
//Ann.: synonym common names should be removed!
|
169
|
if (isNotBlank(commonNameStr)){
|
170
|
makeCommonName(commonNameStr, taxonBase, line);
|
171
|
}
|
172
|
|
173
|
|
174
|
//id
|
175
|
String id = getValue(record, SPECIES_NR);
|
176
|
this.addOriginalSource(taxonBase, id, TAXON_NAMESPACE, state.getConfig().getSourceReference());
|
177
|
|
178
|
//save
|
179
|
// getTaxonService().save(taxonBase);
|
180
|
taxonIdMap.put(id, taxonBase);
|
181
|
}
|
182
|
|
183
|
|
184
|
private String removeProparte(String authorStr) {
|
185
|
String regEx = "\\s+p\\.\\s*p\\.$";
|
186
|
if (authorStr == null || !authorStr.matches(".*" + regEx)){
|
187
|
return authorStr;
|
188
|
}else{
|
189
|
return authorStr.replaceAll(regEx, "");
|
190
|
}
|
191
|
}
|
192
|
|
193
|
private String removeSensuLatoStricto(String authorStr) {
|
194
|
String regEx = "\\s+s\\.\\s*(l|str)\\.$";
|
195
|
|
196
|
if (authorStr == null || !authorStr.matches(".*" + regEx)){
|
197
|
return authorStr;
|
198
|
}else{
|
199
|
return authorStr.replaceAll(regEx, "");
|
200
|
}
|
201
|
}
|
202
|
|
203
|
private String removeAuct(String authorStr) {
|
204
|
String regEx = "auct\\.\\??$";
|
205
|
|
206
|
if (authorStr == null || !authorStr.matches(/*".*" + */regEx)){
|
207
|
return authorStr;
|
208
|
}else{
|
209
|
return ""; //authorStr.replaceAll(regEx, "");
|
210
|
}
|
211
|
}
|
212
|
|
213
|
|
214
|
/**
|
215
|
* @param state
|
216
|
* @param secRefStr
|
217
|
* @return
|
218
|
*/
|
219
|
private Reference getSecRef(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, String secRefStr, String line) {
|
220
|
Reference result = state.getReference(secRefStr);
|
221
|
if (result == null && secRefStr != null){
|
222
|
result = ReferenceFactory.newGeneric();
|
223
|
result.setTitleCache(secRefStr, true);
|
224
|
|
225
|
// TimePeriod tp = TimePeriodParser.parseString(secRefStr.substring(secRefStr.length()-4));
|
226
|
// String authorStrPart = secRefStr.substring(0, secRefStr.length()-6);
|
227
|
// if (! (authorStrPart + ", " + tp.getYear()).equals(secRefStr)){
|
228
|
// logger.warn(line + "Sec ref could not be parsed: " + secRefStr);
|
229
|
// }else{
|
230
|
// result.setDatePublished(tp);
|
231
|
// }
|
232
|
// TeamOrPersonBase<?> author = state.getAgentBase(authorStrPart);
|
233
|
// if (author == null){
|
234
|
// if (authorStrPart.contains("&")){
|
235
|
// Team team = Team.NewInstance();
|
236
|
// String[] authorSplit = authorStrPart.split("&");
|
237
|
// String[] firstAuthorSplit = authorSplit[0].trim().split(",");
|
238
|
// for (String authorStr : firstAuthorSplit){
|
239
|
// addTeamMember(team, authorStr);
|
240
|
// }
|
241
|
// addTeamMember(team, authorSplit[1]);
|
242
|
// result.setAuthorship(team);
|
243
|
// state.putAgentBase(team.getTitleCache(), team);
|
244
|
// }else if (authorStrPart.equalsIgnoreCase("Tropicos") || authorStrPart.equalsIgnoreCase("The Plant List")
|
245
|
// || authorStrPart.equalsIgnoreCase("APG IV")){
|
246
|
// result.setTitle(authorStrPart);
|
247
|
// }else{
|
248
|
// Person person = Person.NewInstance();
|
249
|
// person.setFamilyName(authorStrPart);
|
250
|
// result.setAuthorship(person);
|
251
|
// state.putAgentBase(person.getTitleCache(), person);
|
252
|
// }
|
253
|
// }else{
|
254
|
// result.setAuthorship(author);
|
255
|
// }
|
256
|
state.putReference(secRefStr, result);
|
257
|
}
|
258
|
|
259
|
return result;
|
260
|
}
|
261
|
|
262
|
|
263
|
|
264
|
/**
|
265
|
* @param record
|
266
|
* @param state
|
267
|
* @return
|
268
|
*/
|
269
|
public NameResult makeName(String line, HashMap<String, String> record, SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
270
|
|
271
|
String specieNrStr = getValue(record, SPECIES_NR);
|
272
|
String nameStr = getValue(record, ABBREVIAT);
|
273
|
String authorStr = getValue(record, AUTHOR);
|
274
|
String rankStr = getValue(record, RANG);
|
275
|
|
276
|
NameResult result = new NameResult();
|
277
|
|
278
|
//rank
|
279
|
Rank rank = makeRank(line, state, rankStr);
|
280
|
|
281
|
//name
|
282
|
nameStr = normalizeNameStr(nameStr);
|
283
|
String nameStrWithoutSensu = removeSensuLatoStricto(nameStr);
|
284
|
if (nameStrWithoutSensu.length() < nameStr.length()){
|
285
|
result.sensu = nameStr.substring(nameStrWithoutSensu.length()).trim();
|
286
|
nameStr = nameStrWithoutSensu;
|
287
|
}
|
288
|
|
289
|
//author
|
290
|
//pp
|
291
|
authorStr = normalizeAuthorStr(authorStr);
|
292
|
String authorStrWithoutProParte = removeProparte(authorStr);
|
293
|
result.proParte = authorStrWithoutProParte.length() < authorStr.length();
|
294
|
authorStr = authorStrWithoutProParte;
|
295
|
|
296
|
//auct.
|
297
|
String authorStrWithoutAuct = removeAuct(authorStr);
|
298
|
if (authorStrWithoutAuct.length() < authorStr.length()){
|
299
|
result.auct = authorStr.substring(authorStrWithoutAuct.length()).trim();
|
300
|
}
|
301
|
authorStr = authorStrWithoutAuct;
|
302
|
|
303
|
|
304
|
//name+author
|
305
|
String fullNameStr = CdmUtils.concat(" ", nameStr, authorStr);
|
306
|
|
307
|
IBotanicalName fullName = (IBotanicalName)nameParser.parseReferencedName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
|
308
|
if (fullName.isProtectedTitleCache()){
|
309
|
logger.warn(line + "Name could not be parsed: " + fullNameStr );
|
310
|
}else{
|
311
|
replaceAuthorNamesAndNomRef(state, fullName);
|
312
|
}
|
313
|
// BotanicalName existingName = getExistingName(state, fullName);
|
314
|
|
315
|
//TODO handle existing name
|
316
|
IBotanicalName name = fullName;
|
317
|
this.addOriginalSource(name, specieNrStr, TAXON_NAMESPACE + "_Name", state.getConfig().getSourceReference());
|
318
|
|
319
|
result.name = name;
|
320
|
return result;
|
321
|
}
|
322
|
|
323
|
|
324
|
|
325
|
/**
|
326
|
* @param line
|
327
|
* @param state
|
328
|
* @param rankStr
|
329
|
* @return
|
330
|
*/
|
331
|
private Rank makeRank(String line, SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, String rankStr) {
|
332
|
Rank rank = null;
|
333
|
try {
|
334
|
rank = state.getTransformer().getRankByKey(rankStr);
|
335
|
if (rank == null){
|
336
|
UUID rankUuid = state.getTransformer().getRankUuid(rankStr);
|
337
|
OrderedTermVocabulary<Rank> voc = (OrderedTermVocabulary<Rank>)Rank.SPECIES().getVocabulary();
|
338
|
//TODO
|
339
|
Rank lowerRank = Rank.FORM();
|
340
|
rank = getRank(state, rankUuid, rankStr, rankStr, rankStr, voc, lowerRank, RankClass.Infraspecific);
|
341
|
if (rank == null){
|
342
|
logger.warn(line + "Rank not recognized: " + rankStr);
|
343
|
}
|
344
|
}
|
345
|
} catch (Exception e1) {
|
346
|
logger.warn(line + "Rank not recognized: " + rankStr);
|
347
|
}
|
348
|
return rank;
|
349
|
}
|
350
|
|
351
|
|
352
|
/**
|
353
|
* @param authorStr
|
354
|
* @return
|
355
|
*/
|
356
|
private String normalizeAuthorStr(String authorStr) {
|
357
|
if (isBlank(authorStr)){
|
358
|
return "";
|
359
|
}else{
|
360
|
if (authorStr.equals("-") || authorStr.equals("#")){
|
361
|
authorStr = "";
|
362
|
}
|
363
|
return authorStr;
|
364
|
}
|
365
|
}
|
366
|
|
367
|
private String normalizeNameStr(String nameStr) {
|
368
|
nameStr = nameStr
|
369
|
.replace(" agg.", " aggr.")
|
370
|
.replace(" fo. ", " f. ")
|
371
|
;
|
372
|
return nameStr;
|
373
|
}
|
374
|
|
375
|
|
376
|
boolean nameMapIsInitialized = false;
|
377
|
/**
|
378
|
* @param state
|
379
|
* @param fullName
|
380
|
* @return
|
381
|
*/
|
382
|
private IBotanicalName getExistingName(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state, IBotanicalName fullName) {
|
383
|
initExistinNames(state);
|
384
|
return (IBotanicalName)state.getName(fullName.getTitleCache());
|
385
|
}
|
386
|
|
387
|
/**
|
388
|
* @param state
|
389
|
*/
|
390
|
@SuppressWarnings("rawtypes")
|
391
|
private void initExistinNames(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
392
|
if (!nameMapIsInitialized){
|
393
|
List<String> propertyPaths = Arrays.asList("");
|
394
|
List<TaxonName> existingNames = this.getNameService().list(null, null, null, null, propertyPaths);
|
395
|
for (TaxonName tnb : existingNames){
|
396
|
state.putName(tnb.getTitleCache(), tnb);
|
397
|
}
|
398
|
nameMapIsInitialized = true;
|
399
|
}
|
400
|
}
|
401
|
|
402
|
|
403
|
/**
|
404
|
* @param commmonNameStr
|
405
|
* @param taxonBase
|
406
|
*/
|
407
|
private void makeCommonName(String commmonNameStr, TaxonBase<?> taxonBase, String line) {
|
408
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
409
|
//synonym common names should be neglected
|
410
|
return;
|
411
|
}
|
412
|
Taxon acceptedTaxon = getAccepted(taxonBase);
|
413
|
if (acceptedTaxon != null){
|
414
|
TaxonDescription desc = getTaxonDescription(acceptedTaxon, false, true);
|
415
|
desc.setDefault(true);
|
416
|
CommonTaxonName commonName = CommonTaxonName.NewInstance(commmonNameStr, Language.GERMAN(), Country.GERMANY());
|
417
|
desc.addElement(commonName);
|
418
|
}else{
|
419
|
logger.warn(line + "No accepted taxon available");
|
420
|
}
|
421
|
|
422
|
}
|
423
|
|
424
|
|
425
|
/**
|
426
|
* @param next
|
427
|
* @return
|
428
|
*/
|
429
|
private Taxon getAccepted(TaxonBase<?> taxonBase) {
|
430
|
if (taxonBase.isInstanceOf(Taxon.class)){
|
431
|
return CdmBase.deproxy(taxonBase, Taxon.class);
|
432
|
}else{
|
433
|
Synonym syn = CdmBase.deproxy(taxonBase, Synonym.class);
|
434
|
return syn.getAcceptedTaxon();
|
435
|
}
|
436
|
}
|
437
|
|
438
|
|
439
|
@Override
|
440
|
protected boolean isIgnore(SimpleExcelTaxonImportState<GermanSLImportConfigurator> state) {
|
441
|
return ! state.getConfig().isDoTaxa();
|
442
|
}
|
443
|
}
|