1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.cuba;
|
11
|
|
12
|
import java.util.ArrayList;
|
13
|
import java.util.Arrays;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.List;
|
16
|
import java.util.Set;
|
17
|
import java.util.UUID;
|
18
|
import java.util.regex.Matcher;
|
19
|
import java.util.regex.Pattern;
|
20
|
|
21
|
import org.apache.commons.lang.StringUtils;
|
22
|
import org.apache.log4j.Logger;
|
23
|
import org.springframework.stereotype.Component;
|
24
|
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
27
|
import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
|
28
|
import eu.etaxonomy.cdm.model.agent.Team;
|
29
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
30
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
31
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
32
|
import eu.etaxonomy.cdm.model.common.Language;
|
33
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
34
|
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
|
35
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
36
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
37
|
import eu.etaxonomy.cdm.model.name.BotanicalName;
|
38
|
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
|
39
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
40
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
|
41
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
42
|
import eu.etaxonomy.cdm.model.name.Rank;
|
43
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
44
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
45
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
46
|
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
|
47
|
import eu.etaxonomy.cdm.model.taxon.SynonymRelationship;
|
48
|
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
|
49
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
50
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
51
|
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
|
52
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
53
|
|
54
|
/**
|
55
|
* @author a.mueller
|
56
|
* @created 05.01.2016
|
57
|
*/
|
58
|
|
59
|
@Component
|
60
|
public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
|
61
|
private static final long serialVersionUID = -747486709409732371L;
|
62
|
private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
|
63
|
|
64
|
private static final String HOMONYM_MARKER = ".*\\s+homon.?$";
|
65
|
private static final String DOUBTFUL_MARKER = "^\\?\\s?";
|
66
|
|
67
|
|
68
|
private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
|
69
|
private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
|
70
|
|
71
|
private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
|
72
|
private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
|
73
|
|
74
|
private static List<String> expectedKeys= Arrays.asList(new String[]{"Fam.","(Fam.)","Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
|
75
|
|
76
|
@Override
|
77
|
protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
|
78
|
//we do everything in firstPass here
|
79
|
return;
|
80
|
}
|
81
|
|
82
|
|
83
|
/**
|
84
|
* @param record
|
85
|
* @param state
|
86
|
* @param taxon
|
87
|
*/
|
88
|
private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
|
89
|
try {
|
90
|
NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("C"), null, null, null, null, null);
|
91
|
TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
|
92
|
List<PresenceAbsenceTerm> statuss = makeCubanStatus(record, state);
|
93
|
for (PresenceAbsenceTerm status : statuss){
|
94
|
Distribution distribution = Distribution.NewInstance(cuba, status);
|
95
|
desc.addElement(distribution);
|
96
|
}
|
97
|
} catch (UndefinedTransformerMethodException e) {
|
98
|
e.printStackTrace();
|
99
|
}
|
100
|
}
|
101
|
|
102
|
|
103
|
/**
|
104
|
* @param record
|
105
|
* @param state
|
106
|
* @return
|
107
|
* @throws UndefinedTransformerMethodException
|
108
|
*/
|
109
|
private List<PresenceAbsenceTerm> makeCubanStatus(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
|
110
|
boolean isAbsent = false; //TODO
|
111
|
|
112
|
String line = state.getCurrentLine() + ": ";
|
113
|
List<PresenceAbsenceTerm> result = new ArrayList<>();
|
114
|
|
115
|
String endemicStr = getValue(record, "End");
|
116
|
String indigenousStr = getValue(record, "Ind");
|
117
|
String indigenousDoubtStr = getValue(record, "Ind? D");
|
118
|
String naturalisedStr = getValue(record, "Nat");
|
119
|
String dudStr = getValue(record, "Dud P");
|
120
|
String advStr = getValue(record, "Adv");
|
121
|
String cultStr = getValue(record, "Cult C");
|
122
|
|
123
|
if (endemicStr != null){
|
124
|
if(endemicStr.equals("+")){
|
125
|
PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
|
126
|
result.add(endemicState);
|
127
|
}else if(isMinus(endemicStr)){
|
128
|
UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
|
129
|
PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
|
130
|
result.add(endemicState);
|
131
|
}else{
|
132
|
logger.warn(line + "Endemic not recognized: " + endemicStr);
|
133
|
}
|
134
|
}
|
135
|
if (indigenousStr != null){
|
136
|
if(indigenousStr.equals("+")){
|
137
|
UUID indigenousUuid = state.getTransformer().getPresenceTermUuid("Ind.");
|
138
|
PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
|
139
|
result.add(indigenousState);
|
140
|
}else if(isMinus(indigenousStr)){
|
141
|
PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("-Ind.");
|
142
|
result.add(haturalizedState);
|
143
|
}else if(indigenousStr.equals("?")){
|
144
|
UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("?Ind.");
|
145
|
PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
|
146
|
result.add(indigenousDoubtState);
|
147
|
}else{
|
148
|
logger.warn(line + "Indigenous not recognized: " + indigenousStr);
|
149
|
}
|
150
|
}
|
151
|
if(indigenousDoubtStr != null){
|
152
|
if(indigenousDoubtStr.equals("D")){
|
153
|
UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("Ind.?");
|
154
|
PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
|
155
|
result.add(indigenousDoubtState);
|
156
|
}else{
|
157
|
logger.warn(line + "Indigenous doubtful not recognized: " + indigenousDoubtStr);
|
158
|
}
|
159
|
}
|
160
|
if(naturalisedStr != null){
|
161
|
if(naturalisedStr.equals("N")){
|
162
|
PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
|
163
|
result.add(haturalizedState);
|
164
|
}else if(isMinus(naturalisedStr)){
|
165
|
UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
|
166
|
PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
|
167
|
result.add(naturalisedErrorState);
|
168
|
}else if(naturalisedStr.equals("?")){
|
169
|
UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
|
170
|
PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
|
171
|
result.add(naturalisedDoubtState);
|
172
|
}else{
|
173
|
logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
|
174
|
}
|
175
|
}
|
176
|
if(dudStr != null){
|
177
|
if(dudStr.equals("P")){
|
178
|
UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
|
179
|
PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
|
180
|
result.add(dudState);
|
181
|
}else if(isMinus(dudStr)){
|
182
|
UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
|
183
|
PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
|
184
|
result.add(nonNativeErrorState);
|
185
|
}else if(dudStr.equals("?")){
|
186
|
UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
|
187
|
PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
|
188
|
result.add(naturalisedDoubtState);
|
189
|
}else{
|
190
|
logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
|
191
|
}
|
192
|
}
|
193
|
if(advStr != null){
|
194
|
if(advStr.equals("A")){
|
195
|
UUID advUuid = state.getTransformer().getPresenceTermUuid("Adv.");
|
196
|
PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
|
197
|
result.add(advState);
|
198
|
}else if(isMinus(advStr)){
|
199
|
UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
|
200
|
PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
|
201
|
result.add(advState);
|
202
|
}else{
|
203
|
logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
|
204
|
}
|
205
|
}else if(cultStr != null){
|
206
|
if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
|
207
|
logger.warn("'cultivated' not recognized: " + cultStr);
|
208
|
}else if(cultStr.equals("C")){
|
209
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
|
210
|
result.add(cultivatedState);
|
211
|
}else if(cultStr.equals("?")){
|
212
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
|
213
|
result.add(cultivatedState);
|
214
|
}else if(cultStr.equals("(C)")){
|
215
|
UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
|
216
|
PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
|
217
|
result.add(cultivatedState);
|
218
|
}else if(isMinus(cultStr)){
|
219
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
|
220
|
result.add(cultivatedState);
|
221
|
}else{
|
222
|
logger.warn(line + "'cultivated' not recognized: " + cultStr);
|
223
|
}
|
224
|
}
|
225
|
|
226
|
return result;
|
227
|
}
|
228
|
|
229
|
|
230
|
/**
|
231
|
* @param indigenousStr
|
232
|
* @return
|
233
|
*/
|
234
|
private boolean isMinus(String str) {
|
235
|
return str.equals("-") || str.equals("–");
|
236
|
}
|
237
|
|
238
|
|
239
|
/**
|
240
|
* @param indigenousStr
|
241
|
* @return
|
242
|
*/
|
243
|
private boolean checkPlusMinusDoubt(String str) {
|
244
|
return str.equals("+") || isMinus(str)|| str.equals("?");
|
245
|
}
|
246
|
|
247
|
|
248
|
/**
|
249
|
* @param indigenousStr
|
250
|
* @param indigenousDoubtStr
|
251
|
* @param naturalisedStr
|
252
|
* @param dudStr
|
253
|
* @param advStr
|
254
|
* @param cultStr
|
255
|
*/
|
256
|
private boolean checkAllNull(String ... others) {
|
257
|
for (String other : others){
|
258
|
if (other != null){
|
259
|
return false;
|
260
|
}
|
261
|
}
|
262
|
return true;
|
263
|
}
|
264
|
|
265
|
|
266
|
private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
|
267
|
// String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
|
268
|
// + "(\\((.{6,})\\))?";
|
269
|
private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
|
270
|
+"(\\((.{6,})\\))?";
|
271
|
private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
|
272
|
+"(\\((.{6,})\\))?";
|
273
|
private static final String auctRegExStr = "auct\\."
|
274
|
+"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S))?(\\s+p\\.\\s*p\\.)?";
|
275
|
private static final String missapliedRegExStr = "“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
|
276
|
private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.";
|
277
|
private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
|
278
|
|
279
|
private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
|
280
|
private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
|
281
|
private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
|
282
|
private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
|
283
|
|
284
|
/**
|
285
|
* @param record
|
286
|
* @param state
|
287
|
* @param taxon
|
288
|
*/
|
289
|
private void makeSynonyms(HashMap<String, String> record, CubaImportState state) {
|
290
|
// boolean forAccepted = true;
|
291
|
String synonymStr = record.get("Syn.");
|
292
|
String line = state.getCurrentLine() + ": ";
|
293
|
|
294
|
if (synonymStr == null){
|
295
|
//TODO test that this is not a synonym only line
|
296
|
return;
|
297
|
}
|
298
|
synonymStr = synonymStr.trim();
|
299
|
|
300
|
// String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
|
301
|
// String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
|
302
|
|
303
|
// Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
|
304
|
|
305
|
Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
|
306
|
Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
|
307
|
Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
|
308
|
Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
|
309
|
|
310
|
List<BotanicalName> homonyms = new ArrayList<>();
|
311
|
if (missapliedMatcher.matches()){
|
312
|
String firstPart = missapliedMatcher.group(1);
|
313
|
BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
314
|
|
315
|
String secondPart = missapliedMatcher.group(2);
|
316
|
Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
|
317
|
if (secondPart.startsWith("sensu")){
|
318
|
secondPart = secondPart.substring(5).trim();
|
319
|
if (secondPart.contains(" ")){
|
320
|
logger.warn(line + "Second part contains more than 1 word. Check if this is correct: " + secondPart);
|
321
|
}
|
322
|
Reference<?> sensu = ReferenceFactory.newGeneric();
|
323
|
Team team = Team.NewTitledInstance(secondPart, null);
|
324
|
sensu.setAuthorship(team);
|
325
|
misappliedNameTaxon.setSec(sensu);
|
326
|
}else if (secondPart.matches(auctRegExStr)){
|
327
|
secondPart = secondPart.replace("p. p.", "p.p.");
|
328
|
misappliedNameTaxon.setAppendedPhrase(secondPart);
|
329
|
}else{
|
330
|
logger.warn(line + "Misapplied second part not recognized: " + secondPart);
|
331
|
}
|
332
|
//TODO
|
333
|
Reference<?> relRef = null;
|
334
|
state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
|
335
|
}else if (nomInvalMatcher.matches()){
|
336
|
String firstPart = nomInvalMatcher.group(1);
|
337
|
BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
338
|
NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
|
339
|
name.addStatus(status);
|
340
|
state.getCurrentTaxon().addSynonymName(name, SynonymRelationshipType.SYNONYM_OF());
|
341
|
}else if (acceptedMatcher.matches()){
|
342
|
String firstPart = acceptedMatcher.group(1);
|
343
|
String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
|
344
|
handleHomotypicGroup(firstPart, state, (BotanicalName)state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
|
345
|
}else if(heterotypicMatcher.matches()){
|
346
|
String firstPart = heterotypicMatcher.group(1).trim();
|
347
|
String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
|
348
|
String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
|
349
|
boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
|
350
|
boolean isHomonym = firstPart.trim().matches(HOMONYM_MARKER);
|
351
|
firstPart = normalizeStatus(firstPart);
|
352
|
BotanicalName synName = (BotanicalName)nameParser.parseReferencedName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
353
|
if (synName.isProtectedTitleCache()){
|
354
|
logger.warn(line + "heterotypic base synonym could not be parsed correctly:" + firstPart);
|
355
|
}
|
356
|
if (isHomonym){
|
357
|
homonyms.add(synName);
|
358
|
}
|
359
|
SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
|
360
|
sr.getSynonym().setDoubtful(isDoubtful);
|
361
|
handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
|
362
|
}else{
|
363
|
logger.warn(line + "Synonym entry does not match: " + synonymStr);
|
364
|
}
|
365
|
}
|
366
|
|
367
|
|
368
|
|
369
|
/**
|
370
|
* @param synonymStr
|
371
|
* @param state
|
372
|
* @param homonyms
|
373
|
* @param homonymPart
|
374
|
* @param isDoubtful
|
375
|
* @param taxon
|
376
|
* @param homotypicalGroup
|
377
|
*/
|
378
|
private void handleHomotypicGroup(String homotypicStr,
|
379
|
CubaImportState state,
|
380
|
BotanicalName homotypicName,
|
381
|
boolean isHeterotypic,
|
382
|
List<BotanicalName> homonyms,
|
383
|
String homonymPart,
|
384
|
boolean isDoubtful) {
|
385
|
|
386
|
if (homotypicStr == null){
|
387
|
return;
|
388
|
}else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
|
389
|
homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
|
390
|
}
|
391
|
|
392
|
BotanicalName currentBasionym = homotypicName;
|
393
|
String[] splits = homotypicStr.split("\\s*,\\s*");
|
394
|
for (String split : splits){
|
395
|
boolean isHomonym = split.trim().matches(HOMONYM_MARKER);
|
396
|
String singleName = normalizeStatus(split);
|
397
|
BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(singleName, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
398
|
if (newName.isProtectedTitleCache()){
|
399
|
logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
|
400
|
}
|
401
|
if (isHomonym){
|
402
|
homonyms.add(newName);
|
403
|
}
|
404
|
if (isHeterotypic){
|
405
|
SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(newName, homotypicName.getHomotypicalGroup(), null, null);
|
406
|
sr.getSynonym().setDoubtful(isDoubtful);
|
407
|
// newName.addBasionym(homotypicName);
|
408
|
currentBasionym = handleBasionym(currentBasionym, newName);
|
409
|
}else{
|
410
|
state.getCurrentTaxon().addHomotypicSynonymName(newName, null, null);
|
411
|
handleBasionym(currentBasionym, newName);
|
412
|
}
|
413
|
}
|
414
|
makeHomonyms(homonyms, homonymPart, state);
|
415
|
}
|
416
|
|
417
|
|
418
|
/**
|
419
|
* @param homonyms
|
420
|
* @param homonymPart
|
421
|
* @param state
|
422
|
*/
|
423
|
private void makeHomonyms(List<BotanicalName> homonyms, String homonymPart, CubaImportState state) {
|
424
|
String line = state.getCurrentLine() + ": ";
|
425
|
homonymPart = homonymPart == null ? "" : homonymPart.trim();
|
426
|
if (homonyms.isEmpty() && homonymPart.equals("")){
|
427
|
return;
|
428
|
}else if (homonymPart.equals("")){
|
429
|
logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
|
430
|
return;
|
431
|
}
|
432
|
homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
|
433
|
String[] splits = homonymPart.split("\\]\\s*\\[");
|
434
|
if (splits.length != homonyms.size()){
|
435
|
logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
|
436
|
return;
|
437
|
}
|
438
|
int i = 0;
|
439
|
for (String split : splits){
|
440
|
split = split.replaceAll("^non\\s+", "");
|
441
|
BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
442
|
if (newName.isProtectedTitleCache()){
|
443
|
logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
|
444
|
}
|
445
|
newName.addRelationshipToName(homonyms.get(i), NameRelationshipType.LATER_HOMONYM(), null);
|
446
|
i++;
|
447
|
}
|
448
|
}
|
449
|
|
450
|
|
451
|
/**
|
452
|
* @param newName
|
453
|
* @param homotypicName
|
454
|
* @return
|
455
|
*/
|
456
|
private BotanicalName handleBasionym(BotanicalName currentBasionym, BotanicalName name2) {
|
457
|
BotanicalName basionymName = currentBasionym;
|
458
|
BotanicalName newCombination = name2;
|
459
|
//switch if necessary
|
460
|
if (basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
|
461
|
basionymName = name2;
|
462
|
newCombination = currentBasionym;
|
463
|
}
|
464
|
if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())){
|
465
|
newCombination.getHomotypicalGroup().setGroupBasionym(basionymName);
|
466
|
}
|
467
|
return basionymName;
|
468
|
}
|
469
|
|
470
|
|
471
|
/**
|
472
|
* @param combinationAuthorship
|
473
|
* @param basi
|
474
|
* @return
|
475
|
*/
|
476
|
private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
|
477
|
if (author1 == null || author2 == null){
|
478
|
return false;
|
479
|
}else {
|
480
|
return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
|
481
|
}
|
482
|
}
|
483
|
|
484
|
|
485
|
/**
|
486
|
* @param record
|
487
|
* @param state
|
488
|
* @param taxon
|
489
|
*/
|
490
|
private void makeNotes(HashMap<String, String> record, CubaImportState state) {
|
491
|
String notesStr = getValue(record, "(Notas)");
|
492
|
if (notesStr == null){
|
493
|
return;
|
494
|
}else{
|
495
|
Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
|
496
|
//TODO
|
497
|
annotation.setAnnotationType(AnnotationType.EDITORIAL());
|
498
|
state.getCurrentTaxon().addAnnotation(annotation);
|
499
|
}
|
500
|
}
|
501
|
|
502
|
|
503
|
/**
|
504
|
* @param record
|
505
|
* @param state
|
506
|
* @param familyTaxon
|
507
|
* @return
|
508
|
*/
|
509
|
private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
|
510
|
String taxonStr = getValue(record, "Taxón");
|
511
|
if (taxonStr == null){
|
512
|
return isSynonym ? state.getCurrentTaxon() : null;
|
513
|
}
|
514
|
boolean isAbsent = false;
|
515
|
if (taxonStr.startsWith("[") && taxonStr.endsWith("]")){
|
516
|
taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
|
517
|
isAbsent = true;
|
518
|
}
|
519
|
taxonStr = normalizeStatus(taxonStr);
|
520
|
|
521
|
BotanicalName botanicalName = (BotanicalName)nameParser.parseReferencedName(taxonStr, nc, Rank.SPECIES());
|
522
|
Reference<?> sec = getSecReference(state);
|
523
|
Taxon taxon = Taxon.NewInstance(botanicalName, sec);
|
524
|
TaxonNode higherNode;
|
525
|
if (botanicalName.isProtectedTitleCache()){
|
526
|
logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStr);
|
527
|
higherNode = familyNode;
|
528
|
}else{
|
529
|
String genusStr = botanicalName.getGenusOrUninomial();
|
530
|
Taxon genus = state.getHigherTaxon(genusStr);
|
531
|
if (genus != null){
|
532
|
higherNode = genus.getTaxonNodes().iterator().next();
|
533
|
}else{
|
534
|
BotanicalName name = BotanicalName.NewInstance(Rank.GENUS());
|
535
|
name.setGenusOrUninomial(genusStr);
|
536
|
genus = Taxon.NewInstance(name, sec);
|
537
|
higherNode = familyNode.addChildTaxon(genus, null, null);
|
538
|
state.putHigherTaxon(genusStr, genus);
|
539
|
}
|
540
|
}
|
541
|
|
542
|
higherNode.addChildTaxon(taxon, null, null);
|
543
|
|
544
|
return taxon;
|
545
|
}
|
546
|
|
547
|
/**
|
548
|
* @param state
|
549
|
* @return
|
550
|
*/
|
551
|
private Reference<?> getSecReference(CubaImportState state) {
|
552
|
Reference<?> result = state.getSecReference();
|
553
|
if (result == null){
|
554
|
result = ReferenceFactory.newDatabase();
|
555
|
result.setTitle("Flora of Cuba");
|
556
|
state.setSecReference(result);
|
557
|
}
|
558
|
return result;
|
559
|
}
|
560
|
|
561
|
|
562
|
private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
|
563
|
"nom. rej.","nom. cons. prop.","nom. altern."};
|
564
|
/**
|
565
|
* @param taxonStr
|
566
|
* @return
|
567
|
*/
|
568
|
private String normalizeStatus(String taxonStr) {
|
569
|
if (taxonStr == null){
|
570
|
return null;
|
571
|
}
|
572
|
for (String nomStatusStr : nomStatusStrings){
|
573
|
nomStatusStr = " " + nomStatusStr;
|
574
|
if (taxonStr.endsWith(nomStatusStr)){
|
575
|
taxonStr = taxonStr.replace(nomStatusStr, "," + nomStatusStr);
|
576
|
}
|
577
|
}
|
578
|
taxonStr = taxonStr.replaceAll(HOMONYM_MARKER, "").trim();
|
579
|
taxonStr = taxonStr.replaceAll(DOUBTFUL_MARKER, "").trim();
|
580
|
return taxonStr;
|
581
|
|
582
|
|
583
|
}
|
584
|
|
585
|
|
586
|
/**
|
587
|
* @param record
|
588
|
* @param state
|
589
|
* @return
|
590
|
*/
|
591
|
private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
|
592
|
String familyStr = getValue(record, "Fam.");
|
593
|
if (familyStr == null){
|
594
|
return null;
|
595
|
}
|
596
|
Taxon family = state.getHigherTaxon(familyStr);
|
597
|
TaxonNode familyNode;
|
598
|
if (family != null){
|
599
|
familyNode = family.getTaxonNodes().iterator().next();
|
600
|
}else{
|
601
|
BotanicalName name = BotanicalName.NewInstance(Rank.FAMILY());
|
602
|
name.setGenusOrUninomial(familyStr);
|
603
|
Reference<?> sec = getSecReference(state);
|
604
|
Taxon taxon = Taxon.NewInstance(name, sec);
|
605
|
ITaxonTreeNode rootNode = getClassification(state);
|
606
|
familyNode = rootNode.addChildTaxon(taxon, sec, null);
|
607
|
state.putHigherTaxon(familyStr, taxon);
|
608
|
}
|
609
|
|
610
|
return familyNode;
|
611
|
}
|
612
|
|
613
|
|
614
|
/**
|
615
|
* @param state
|
616
|
* @return
|
617
|
*/
|
618
|
private TaxonNode getClassification(CubaImportState state) {
|
619
|
Classification classification = state.getClassification();
|
620
|
if (classification == null){
|
621
|
classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
622
|
}
|
623
|
TaxonNode rootNode = state.getRootNode();
|
624
|
if (rootNode == null){
|
625
|
rootNode = getTaxonNodeService().find(plantaeUuid);
|
626
|
}
|
627
|
if (rootNode == null){
|
628
|
Reference<?> sec = getSecReference(state);
|
629
|
if (classification == null){
|
630
|
String classificationName = state.getConfig().getClassificationName();
|
631
|
//TODO
|
632
|
Language language = Language.DEFAULT();
|
633
|
classification = Classification.NewInstance(classificationName, sec, language);
|
634
|
state.setClassification(classification);
|
635
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
636
|
classification.getRootNode().setUuid(rootUuid);
|
637
|
}
|
638
|
|
639
|
BotanicalName plantaeName = BotanicalName.NewInstance(Rank.KINGDOM());
|
640
|
plantaeName.setGenusOrUninomial("Plantae");
|
641
|
Taxon plantae = Taxon.NewInstance(plantaeName, sec);
|
642
|
TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
|
643
|
plantaeNode.setUuid(plantaeUuid);
|
644
|
state.setRootNode(plantaeNode);
|
645
|
getClassificationService().save(classification);
|
646
|
|
647
|
rootNode = plantaeNode;
|
648
|
}
|
649
|
return rootNode;
|
650
|
}
|
651
|
|
652
|
|
653
|
/**
|
654
|
* @param record
|
655
|
* @param originalKey
|
656
|
* @return
|
657
|
*/
|
658
|
private String getValue(HashMap<String, String> record, String originalKey) {
|
659
|
String value = record.get(originalKey);
|
660
|
if (! StringUtils.isBlank(value)) {
|
661
|
if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
|
662
|
value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
|
663
|
return value;
|
664
|
}else{
|
665
|
return null;
|
666
|
}
|
667
|
}
|
668
|
|
669
|
|
670
|
|
671
|
/**
|
672
|
* Stores taxa records in DB
|
673
|
*/
|
674
|
@Override
|
675
|
protected void firstPass(CubaImportState state) {
|
676
|
boolean isSynonym = false;
|
677
|
|
678
|
int line = state.getCurrentLine();
|
679
|
HashMap<String, String> record = state.getOriginalRecord();
|
680
|
|
681
|
Set<String> keys = record.keySet();
|
682
|
for (String key: keys) {
|
683
|
if (! expectedKeys.contains(key)){
|
684
|
logger.warn("Unexpected Key: " + key);
|
685
|
}
|
686
|
}
|
687
|
|
688
|
if (record.get("Fam.") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
|
689
|
//second header line, don't handle
|
690
|
return;
|
691
|
}
|
692
|
|
693
|
//Fam.
|
694
|
TaxonNode familyTaxon = getFamilyTaxon(record, state);
|
695
|
if (familyTaxon == null){
|
696
|
if (record.get("Taxón") != null){
|
697
|
logger.warn(line + ": Family not recognized but taxon exists: " + record.get("Taxón"));
|
698
|
return;
|
699
|
}else if (record.get("Syn.") == null){
|
700
|
logger.warn(line + ": Family not recognized but also no synonym exists");
|
701
|
return;
|
702
|
}else{
|
703
|
isSynonym = true;
|
704
|
}
|
705
|
}
|
706
|
|
707
|
//(Fam.)
|
708
|
//TODO
|
709
|
|
710
|
//Taxón
|
711
|
Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonym);
|
712
|
if (taxon == null && ! isSynonym){
|
713
|
logger.warn(line + ": taxon could not be created and is null");
|
714
|
return;
|
715
|
}
|
716
|
state.setCurrentTaxon(taxon);
|
717
|
|
718
|
//(Notas)
|
719
|
makeNotes(record, state);
|
720
|
|
721
|
//Syn.
|
722
|
makeSynonyms(record, state);
|
723
|
|
724
|
//End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
|
725
|
makeCubanDistribution(record, state);
|
726
|
|
727
|
|
728
|
// "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
729
|
// "CuC","VC","Ci","SS","CA","Cam","LT",
|
730
|
// "CuE","Gr","Ho","SC","Gu",
|
731
|
// "Esp","Ja","PR","Men","Bah","Cay",
|
732
|
// "AmN","AmC","AmS","VM"});
|
733
|
makeProvincesDistribution(record, state);
|
734
|
|
735
|
return;
|
736
|
}
|
737
|
|
738
|
|
739
|
|
740
|
/**
|
741
|
* @param record
|
742
|
* @param state
|
743
|
* @param taxon
|
744
|
*/
|
745
|
// "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
746
|
// "CuC","VC","Ci","SS","CA","Cam","LT",
|
747
|
// "CuE","Gr","Ho","SC","Gu",
|
748
|
private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
|
749
|
List<String> areaKeys = Arrays.asList(new String[]{
|
750
|
"CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
751
|
"CuC","VC","Ci","SS","CA","Cam","LT",
|
752
|
"CuE","Gr","Ho","SC","Gu",
|
753
|
"Esp","Ja","PR","Men","Bah","Cay",
|
754
|
"AmN","AmC","AmS","VM"});
|
755
|
for (String areaKey : areaKeys){
|
756
|
makeSingleProvinceDistribution(areaKey, record, state);
|
757
|
}
|
758
|
|
759
|
}
|
760
|
|
761
|
|
762
|
/**
|
763
|
* @param areaKey
|
764
|
* @param record
|
765
|
* @param state
|
766
|
* @param taxon
|
767
|
*/
|
768
|
private void makeSingleProvinceDistribution(String areaKey,
|
769
|
HashMap<String, String> record,
|
770
|
CubaImportState state) {
|
771
|
try {
|
772
|
UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
|
773
|
if (areaUuid == null){
|
774
|
logger.warn("Area not recognized: " + areaKey);
|
775
|
return;
|
776
|
}
|
777
|
if (record.get(areaKey)==null){
|
778
|
return; //no status defined
|
779
|
}
|
780
|
|
781
|
NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
|
782
|
if (area == null){
|
783
|
logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
|
784
|
}
|
785
|
TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
|
786
|
PresenceAbsenceTerm status = makeProvinceStatus(areaKey, record, state);
|
787
|
if (status == null){
|
788
|
logger.warn(state.getCurrentLine() + ": Distribution Status could not be defined: " + record.get(areaKey));
|
789
|
}
|
790
|
Distribution distribution = Distribution.NewInstance(area, status);
|
791
|
desc.addElement(distribution);
|
792
|
} catch (UndefinedTransformerMethodException e) {
|
793
|
e.printStackTrace();
|
794
|
}
|
795
|
|
796
|
}
|
797
|
|
798
|
|
799
|
/**
|
800
|
* @param areaKey
|
801
|
* @param record
|
802
|
* @param state
|
803
|
* @return
|
804
|
* @throws UndefinedTransformerMethodException
|
805
|
*/
|
806
|
private PresenceAbsenceTerm makeProvinceStatus(String areaKey, HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
|
807
|
String statusStr = record.get(areaKey);
|
808
|
if (statusStr == null){
|
809
|
return null;
|
810
|
}
|
811
|
PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
|
812
|
if (status == null){
|
813
|
UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
|
814
|
status = getPresenceTerm(state, statusUuid, null, null, null, false);
|
815
|
}
|
816
|
return status;
|
817
|
}
|
818
|
|
819
|
|
820
|
/**
|
821
|
* Stores parent-child, synonym and common name relationships
|
822
|
*/
|
823
|
@Override
|
824
|
protected void secondPass(CubaImportState state) {
|
825
|
// CyprusRow cyprusRow = state.getCyprusRow();
|
826
|
return;
|
827
|
}
|
828
|
|
829
|
|
830
|
@Override
|
831
|
protected boolean isIgnore(CubaImportState state) {
|
832
|
return ! state.getConfig().isDoTaxa();
|
833
|
}
|
834
|
|
835
|
@Override
|
836
|
protected boolean doCheck(CubaImportState state) {
|
837
|
logger.warn("DoCheck not yet implemented for CubaExcelImport");
|
838
|
return true;
|
839
|
}
|
840
|
|
841
|
}
|