1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.cuba;
|
11
|
|
12
|
import java.util.ArrayList;
|
13
|
import java.util.Arrays;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.HashSet;
|
16
|
import java.util.List;
|
17
|
import java.util.Set;
|
18
|
import java.util.UUID;
|
19
|
import java.util.regex.Matcher;
|
20
|
import java.util.regex.Pattern;
|
21
|
|
22
|
import org.apache.commons.lang.StringUtils;
|
23
|
import org.apache.log4j.Logger;
|
24
|
import org.springframework.stereotype.Component;
|
25
|
|
26
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
27
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
28
|
import eu.etaxonomy.cdm.io.excel.common.ExcelImportBase;
|
29
|
import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase;
|
30
|
import eu.etaxonomy.cdm.model.agent.Person;
|
31
|
import eu.etaxonomy.cdm.model.agent.Team;
|
32
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
33
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
34
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
35
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
36
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
37
|
import eu.etaxonomy.cdm.model.common.Language;
|
38
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
39
|
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
|
40
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
41
|
import eu.etaxonomy.cdm.model.description.Feature;
|
42
|
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
|
43
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
44
|
import eu.etaxonomy.cdm.model.description.TaxonInteraction;
|
45
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
46
|
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
|
47
|
import eu.etaxonomy.cdm.model.name.IBotanicalName;
|
48
|
import eu.etaxonomy.cdm.model.name.NameRelationship;
|
49
|
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
|
50
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
51
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
|
52
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
53
|
import eu.etaxonomy.cdm.model.name.Rank;
|
54
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
55
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
56
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
57
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
58
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
59
|
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
|
60
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
61
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
62
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
63
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
64
|
import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
|
65
|
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
|
66
|
import eu.etaxonomy.cdm.strategy.homotypicgroup.BasionymRelationCreator;
|
67
|
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
|
68
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
69
|
|
70
|
/**
|
71
|
* @author a.mueller
|
72
|
* @created 05.01.2016
|
73
|
*/
|
74
|
|
75
|
@Component
|
76
|
public class CubaExcelImport
|
77
|
extends ExcelImportBase<CubaImportState, CubaImportConfigurator, ExcelRowBase> {
|
78
|
private static final long serialVersionUID = -747486709409732371L;
|
79
|
private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
|
80
|
|
81
|
private static final String HOMONYM_MARKER = "\\s+homon.?$";
|
82
|
private static final String DOUBTFUL_MARKER = "^\\?\\s?";
|
83
|
|
84
|
|
85
|
private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
|
86
|
private static UUID spermatophytaUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
|
87
|
private static UUID pteridophytaUuid = UUID.fromString("cd2a7c42-4e2b-4a42-8044-18c8e3ccb76d");
|
88
|
|
89
|
private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
|
90
|
private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
|
91
|
|
92
|
private static List<String> expectedKeys = Arrays.asList(new String[]{
|
93
|
"Fam. default","Fam. FRC","Fam. A&S","Fam. FC","Fam. Sánchez 2017",
|
94
|
"Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
|
95
|
|
96
|
@Override
|
97
|
protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
|
98
|
//we do everything in firstPass here
|
99
|
return;
|
100
|
}
|
101
|
|
102
|
|
103
|
/**
|
104
|
* @param record
|
105
|
* @param state
|
106
|
* @param taxon
|
107
|
*/
|
108
|
private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
|
109
|
try {
|
110
|
NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("Cu"), null, null, null, null, null);
|
111
|
TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
|
112
|
List<PresenceAbsenceTerm> statuss = makeCubanStatuss(record, state);
|
113
|
for (PresenceAbsenceTerm status : statuss){
|
114
|
Distribution distribution = Distribution.NewInstance(cuba, status);
|
115
|
desc.addElement(distribution);
|
116
|
distribution.addSource(makeDescriptionSource(state));
|
117
|
}
|
118
|
} catch (UndefinedTransformerMethodException e) {
|
119
|
e.printStackTrace();
|
120
|
}
|
121
|
}
|
122
|
|
123
|
|
124
|
/**
|
125
|
* @param record
|
126
|
* @param state
|
127
|
* @return
|
128
|
* @throws UndefinedTransformerMethodException
|
129
|
*/
|
130
|
private List<PresenceAbsenceTerm> makeCubanStatuss(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
|
131
|
PresenceAbsenceTerm highestStatus = null;
|
132
|
|
133
|
String line = state.getCurrentLine() + ": ";
|
134
|
List<PresenceAbsenceTerm> result = new ArrayList<>();
|
135
|
|
136
|
String endemicStr = getValue(record, "End");
|
137
|
String indigenousStr = getValue(record, "Ind");
|
138
|
String indigenousDoubtStr = getValue(record, "Ind? D");
|
139
|
String naturalisedStr = getValue(record, "Nat");
|
140
|
String dudStr = getValue(record, "Dud P");
|
141
|
String advStr = getValue(record, "Adv");
|
142
|
String cultStr = getValue(record, "Cult C");
|
143
|
|
144
|
state.setEndemic(false);
|
145
|
|
146
|
if (endemicStr != null){
|
147
|
if(endemicStr.equals("+")){
|
148
|
PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
|
149
|
result.add(endemicState);
|
150
|
highestStatus = endemicState;
|
151
|
state.setEndemic(true);
|
152
|
}else if(isMinus(endemicStr)){
|
153
|
UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
|
154
|
PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
|
155
|
result.add(endemicState);
|
156
|
checkAbsentHighestState(highestStatus, line, "endemic", false);
|
157
|
}else if(endemicStr.equals("?")){
|
158
|
UUID endemicDoubtfulUuid = state.getTransformer().getPresenceTermUuid("?E");
|
159
|
PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicDoubtfulUuid, null, null, null, false);
|
160
|
result.add(endemicState);
|
161
|
checkAbsentHighestState(highestStatus, line, "endemic", false);
|
162
|
}else{
|
163
|
logger.warn(line + "Endemic not recognized: " + endemicStr);
|
164
|
}
|
165
|
}
|
166
|
if (indigenousStr != null){
|
167
|
if(indigenousStr.equals("+")){
|
168
|
PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("Ind.");
|
169
|
// PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
|
170
|
result.add(indigenousState);
|
171
|
highestStatus = highestStatus != null ? highestStatus : indigenousState;
|
172
|
}else if(isMinus(indigenousStr)){
|
173
|
PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("-Ind.");
|
174
|
result.add(indigenousState);
|
175
|
checkAbsentHighestState(highestStatus, line, "indigenous", false);
|
176
|
}else if(indigenousStr.equals("?")){
|
177
|
PresenceAbsenceTerm indigenousDoubtState = state.getTransformer().getPresenceTermByKey("?Ind.");
|
178
|
// PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
|
179
|
result.add(indigenousDoubtState);
|
180
|
checkAbsentHighestState(highestStatus, line, "indigenous", true);
|
181
|
}else{
|
182
|
logger.warn(line + "Indigenous not recognized: " + indigenousStr);
|
183
|
}
|
184
|
}
|
185
|
if(indigenousDoubtStr != null){
|
186
|
if(indigenousDoubtStr.equals("D")){
|
187
|
PresenceAbsenceTerm doubtIndigenousState = state.getTransformer().getPresenceTermByKey("Ind.?");
|
188
|
// PresenceAbsenceTerm doubtIndigenousState = getPresenceTerm(state, doubtIndigenousUuid, null, null, null, false);
|
189
|
result.add(doubtIndigenousState);
|
190
|
highestStatus = highestStatus != null ? highestStatus : doubtIndigenousState;
|
191
|
}else if(isMinus(indigenousDoubtStr)){
|
192
|
UUID doubtIndigenousErrorUuid = state.getTransformer().getPresenceTermUuid("-Ind.?");
|
193
|
PresenceAbsenceTerm doubtIndigenousErrorState = getPresenceTerm(state, doubtIndigenousErrorUuid, null, null, null, false);
|
194
|
result.add(doubtIndigenousErrorState);
|
195
|
checkAbsentHighestState(highestStatus, line, "doubtfully indigenous", true);
|
196
|
}else{
|
197
|
logger.warn(line + "doubtfully indigenous not recognized: " + indigenousDoubtStr);
|
198
|
}
|
199
|
}
|
200
|
if(naturalisedStr != null){
|
201
|
if(naturalisedStr.equals("N")){
|
202
|
PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
|
203
|
result.add(haturalizedState);
|
204
|
highestStatus = highestStatus != null ? highestStatus : haturalizedState;
|
205
|
}else if(isMinus(naturalisedStr)){
|
206
|
UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
|
207
|
PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
|
208
|
result.add(naturalisedErrorState);
|
209
|
checkAbsentHighestState(highestStatus, line, "naturalized", false);
|
210
|
}else if(naturalisedStr.equals("?")){
|
211
|
UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
|
212
|
PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
|
213
|
result.add(naturalisedDoubtState);
|
214
|
checkAbsentHighestState(highestStatus, line, "naturalized", true);
|
215
|
}else{
|
216
|
logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
|
217
|
}
|
218
|
}
|
219
|
if(dudStr != null){
|
220
|
if(dudStr.equals("P")){
|
221
|
UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
|
222
|
PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
|
223
|
result.add(dudState);
|
224
|
highestStatus = highestStatus != null ? highestStatus : dudState;
|
225
|
}else if(isMinus(dudStr)){
|
226
|
UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
|
227
|
PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
|
228
|
result.add(nonNativeErrorState);
|
229
|
checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", false);
|
230
|
}else if(dudStr.equals("?")){
|
231
|
UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
|
232
|
PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
|
233
|
result.add(naturalisedDoubtState);
|
234
|
checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", true);
|
235
|
}else{
|
236
|
logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
|
237
|
}
|
238
|
}
|
239
|
if(advStr != null){
|
240
|
if(advStr.equals("A")){
|
241
|
PresenceAbsenceTerm advState = state.getTransformer().getPresenceTermByKey("Adv.");
|
242
|
// PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
|
243
|
result.add(advState);
|
244
|
highestStatus = highestStatus != null ? highestStatus : advState;
|
245
|
}else if(isMinus(advStr)){
|
246
|
UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
|
247
|
PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
|
248
|
result.add(advState);
|
249
|
checkAbsentHighestState(highestStatus, line, "adventive", false);
|
250
|
}else if(advStr.equals("(A)")){
|
251
|
UUID rareCasualUuid = state.getTransformer().getPresenceTermUuid("(A)");
|
252
|
PresenceAbsenceTerm rareCasual = getPresenceTerm(state, rareCasualUuid, null, null, null, false);
|
253
|
result.add(rareCasual);
|
254
|
}else{
|
255
|
logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
|
256
|
}
|
257
|
}else if(cultStr != null){
|
258
|
if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
|
259
|
logger.warn("'cultivated' not recognized: " + cultStr);
|
260
|
}else if(cultStr.equals("C")){
|
261
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
|
262
|
result.add(cultivatedState);
|
263
|
highestStatus = highestStatus != null ? highestStatus : cultivatedState;
|
264
|
}else if(cultStr.equals("?")){
|
265
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
|
266
|
result.add(cultivatedState);
|
267
|
checkAbsentHighestState(highestStatus, line, "cultivated", true);
|
268
|
}else if(cultStr.equals("(C)")){
|
269
|
UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
|
270
|
PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
|
271
|
result.add(cultivatedState);
|
272
|
}else if(isMinus(cultStr)){
|
273
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
|
274
|
result.add(cultivatedState);
|
275
|
checkAbsentHighestState(highestStatus, line, "cultivated", false);
|
276
|
}else{
|
277
|
logger.warn(line + "'cultivated' not recognized: " + cultStr);
|
278
|
}
|
279
|
}
|
280
|
state.setHighestStatusForTaxon(highestStatus);
|
281
|
return result;
|
282
|
}
|
283
|
|
284
|
|
285
|
/**
|
286
|
* @param highestStatus
|
287
|
* @param line
|
288
|
*/
|
289
|
private void checkAbsentHighestState(PresenceAbsenceTerm highestStatus, String line, String stateLabel, boolean doubtful) {
|
290
|
//can be removed, highest status is not used anymore
|
291
|
if (highestStatus == null){
|
292
|
String absentStr = doubtful ? "doubtful" : "absent";
|
293
|
logger.info(line + "Highest cuban state is " + absentStr + " " + stateLabel);
|
294
|
}
|
295
|
|
296
|
}
|
297
|
|
298
|
|
299
|
/**
|
300
|
* @param indigenousStr
|
301
|
* @return
|
302
|
*/
|
303
|
private boolean isMinus(String str) {
|
304
|
return str.equals("-") || str.equals("–") || str.equals("‒");
|
305
|
}
|
306
|
|
307
|
|
308
|
/**
|
309
|
* @param indigenousStr
|
310
|
* @return
|
311
|
*/
|
312
|
private boolean checkPlusMinusDoubt(String str) {
|
313
|
return str.equals("+") || isMinus(str)|| str.equals("?");
|
314
|
}
|
315
|
|
316
|
|
317
|
/**
|
318
|
* @param indigenousStr
|
319
|
* @param indigenousDoubtStr
|
320
|
* @param naturalisedStr
|
321
|
* @param dudStr
|
322
|
* @param advStr
|
323
|
* @param cultStr
|
324
|
*/
|
325
|
private boolean checkAllNull(String ... others) {
|
326
|
for (String other : others){
|
327
|
if (other != null){
|
328
|
return false;
|
329
|
}
|
330
|
}
|
331
|
return true;
|
332
|
}
|
333
|
|
334
|
|
335
|
private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
|
336
|
// String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
|
337
|
// + "(\\((.{6,})\\))?";
|
338
|
private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
|
339
|
+"(\\((.{6,})\\))?";
|
340
|
private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
|
341
|
+"(\\((.{6,})\\))?";
|
342
|
private static final String auctRegExStr = "auct\\."
|
343
|
+"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sCombs|\\sBritton|\\sGriseb\\.(\\sFC-S|\\sA&S)?|\\sWright"
|
344
|
+ "|\\sHammer|\\sEngl\\.||\\sMaza|\\sMiers|\\sRoig|\\sBorhidi|\\sFRC|\\sCoL"
|
345
|
+ "|\\sAckerman|\\sMújica|\\sDíaz|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
|
346
|
|
347
|
|
348
|
private static final String missapliedRegExStr = "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
|
349
|
private static final String sphalmRegExStr = "“(.*{5,})”\\s+((FC-S|A&S)\\s)?sphalm\\.(\\s(FC(-S)?|A&S|inval\\.))?";
|
350
|
private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.(\\s(West|Moldenke|FC|Jacq.))?";
|
351
|
private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
|
352
|
|
353
|
private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
|
354
|
private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
|
355
|
private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
|
356
|
private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
|
357
|
private static final Pattern sphalmRegEx = Pattern.compile(sphalmRegExStr);
|
358
|
|
359
|
/**
|
360
|
* @param record
|
361
|
* @param state
|
362
|
* @param taxon
|
363
|
*/
|
364
|
private void makeSynonyms(HashMap<String, String> record, CubaImportState state, boolean isFirstSynonym) {
|
365
|
// boolean forAccepted = true;
|
366
|
String synonymStr = record.get("Syn.");
|
367
|
String line = state.getCurrentLine() + ": ";
|
368
|
|
369
|
|
370
|
if (synonymStr == null){
|
371
|
//TODO test that this is not a synonym only line
|
372
|
return;
|
373
|
}
|
374
|
|
375
|
if (state.getCurrentTaxon() == null){
|
376
|
logger.error(line + "Current taxon is null for synonym");
|
377
|
return;
|
378
|
}
|
379
|
|
380
|
|
381
|
synonymStr = synonymStr.trim();
|
382
|
synonymStr = synonymStr.replace("[taxon]", "[infraspec.]");
|
383
|
|
384
|
// String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
|
385
|
// String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
|
386
|
|
387
|
// Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
|
388
|
|
389
|
|
390
|
Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
|
391
|
Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
|
392
|
Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
|
393
|
Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
|
394
|
Matcher sphalmMatcher = sphalmRegEx.matcher(synonymStr);
|
395
|
|
396
|
List<IBotanicalName> homonyms = new ArrayList<>();
|
397
|
if (missapliedMatcher.matches()){
|
398
|
boolean doubtful = missapliedMatcher.group(1) != null;
|
399
|
String firstPart = missapliedMatcher.group(2);
|
400
|
IBotanicalName name = (IBotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
401
|
name.addSource(makeOriginalSource(state));
|
402
|
|
403
|
String secondPart = missapliedMatcher.group(3);
|
404
|
Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
|
405
|
misappliedNameTaxon.addSource(makeOriginalSource(state));
|
406
|
misappliedNameTaxon.setDoubtful(doubtful);
|
407
|
boolean isProParteMisapplied = false;
|
408
|
if (secondPart.startsWith("sensu")){
|
409
|
secondPart = secondPart.substring(5).trim();
|
410
|
if (secondPart.endsWith(" p.p.")){
|
411
|
secondPart = secondPart.replaceAll(" p.p.$", "");
|
412
|
isProParteMisapplied = true;
|
413
|
}
|
414
|
Reference sensu;
|
415
|
if (isConcreteReference(secondPart)) {
|
416
|
List<Reference> sensuRefs = getConcreteReferences(secondPart, state);
|
417
|
if (sensuRefs.size() > 1){
|
418
|
logger.warn(line + "MAs > 1 not yet handled: " + secondPart);
|
419
|
}
|
420
|
sensu = sensuRefs.get(0);
|
421
|
}else{
|
422
|
if (secondPart.contains(" ")){
|
423
|
logger.warn(line + "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart);
|
424
|
}
|
425
|
sensu = ReferenceFactory.newGeneric();
|
426
|
Team team = Team.NewTitledInstance(secondPart, null);
|
427
|
sensu.setAuthorship(team);
|
428
|
}
|
429
|
misappliedNameTaxon.setSec(sensu);
|
430
|
}else if (secondPart.matches(auctRegExStr)){
|
431
|
secondPart = secondPart.replace("p. p.", "p.p.");
|
432
|
misappliedNameTaxon.setAppendedPhrase(secondPart);
|
433
|
}else{
|
434
|
logger.warn(line + "Misapplied second part not recognized: " + secondPart);
|
435
|
}
|
436
|
//TODO
|
437
|
Reference relRef = null;
|
438
|
TaxonRelationship rel = state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
|
439
|
if (isProParteMisapplied){
|
440
|
rel.setType(TaxonRelationshipType.PRO_PARTE_MISAPPLIED_NAME_FOR());
|
441
|
}
|
442
|
}else if (nomInvalMatcher.matches()){
|
443
|
String firstPart = nomInvalMatcher.group(1);
|
444
|
String afterInval = nomInvalMatcher.group(2);
|
445
|
if (StringUtils.isNotBlank(afterInval)){
|
446
|
logger.warn(state.getCurrentLine() + ": After inval to be implemented: " + afterInval);
|
447
|
}
|
448
|
TaxonName name = (TaxonName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
449
|
name.addSource(makeOriginalSource(state));
|
450
|
NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
|
451
|
name.addStatus(status);
|
452
|
Synonym syn = state.getCurrentTaxon().addSynonymName(name, SynonymType.SYNONYM_OF());
|
453
|
syn.addSource(makeOriginalSource(state));
|
454
|
}else if (sphalmMatcher.matches()){
|
455
|
String firstPart = sphalmMatcher.group(1);
|
456
|
String sphalmPart = synonymStr.replace(firstPart, "").replace("“","").replace("”","").trim();
|
457
|
TaxonName name = (TaxonName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
458
|
// NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
|
459
|
// name.addStatus(status);
|
460
|
name.addSource(makeOriginalSource(state));
|
461
|
Synonym syn = state.getCurrentTaxon().addSynonymName(name, SynonymType.SYNONYM_OF());
|
462
|
syn.setAppendedPhrase(sphalmPart);
|
463
|
syn.setSec(null);
|
464
|
syn.addSource(makeOriginalSource(state));
|
465
|
}else if (acceptedMatcher.matches()){
|
466
|
String firstPart = acceptedMatcher.group(1);
|
467
|
String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
|
468
|
List<IBotanicalName> list = handleHomotypicGroup(firstPart, state, state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
|
469
|
checkFirstSynonym(state, list, isFirstSynonym, synonymStr, false);
|
470
|
}else if(heterotypicMatcher.matches()){
|
471
|
String firstPart = heterotypicMatcher.group(1).trim();
|
472
|
String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
|
473
|
String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
|
474
|
boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
|
475
|
firstPart = replaceHomonIlleg(firstPart);
|
476
|
boolean isHomonym = firstPart.matches(".*" + HOMONYM_MARKER);
|
477
|
TaxonName synName = (TaxonName)makeName(state, firstPart);
|
478
|
if (synName.isProtectedTitleCache()){
|
479
|
logger.warn(line + "Heterotypic base synonym could not be parsed correctly: " + firstPart);
|
480
|
}
|
481
|
if (isHomonym){
|
482
|
homonyms.add(synName);
|
483
|
}
|
484
|
Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
|
485
|
syn.setDoubtful(isDoubtful);
|
486
|
syn.addSource(makeOriginalSource(state));
|
487
|
List<IBotanicalName> list = handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
|
488
|
checkFirstSynonym(state, list, isFirstSynonym, synonymStr, true);
|
489
|
|
490
|
}else if (isSpecialHeterotypic(synonymStr)){
|
491
|
TaxonName synName = (TaxonName)makeName(state, synonymStr);
|
492
|
if (synName.isProtectedTitleCache()){
|
493
|
logger.warn(line + "Special heterotypic synonym could not be parsed correctly:" + synonymStr);
|
494
|
}
|
495
|
Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
|
496
|
syn.addSource(makeOriginalSource(state));
|
497
|
}else{
|
498
|
logger.warn(line + "Synonym entry does not match: " + synonymStr);
|
499
|
}
|
500
|
}
|
501
|
|
502
|
/**
|
503
|
* @param secondPart
|
504
|
* @return
|
505
|
*/
|
506
|
private boolean isConcreteReference(String secondPart) {
|
507
|
boolean result = false;
|
508
|
String[] splits = secondPart.split(",");
|
509
|
for (String split : splits){
|
510
|
split = split.trim().replaceAll(" p.p.$", "");
|
511
|
result = split.equals("Griseb. 2") || split.equals("Sauv. 3")
|
512
|
|| split.equals("Grisebach 5") || split.equals("Griseb. 78") ;
|
513
|
if (result == false){
|
514
|
return result;
|
515
|
}
|
516
|
}
|
517
|
return result;
|
518
|
}
|
519
|
|
520
|
private List<Reference> getConcreteReferences(String secondPart, CubaImportState state) {
|
521
|
List<Reference> result = new ArrayList<>();
|
522
|
String[] splits = secondPart.split(",");
|
523
|
for (String split : splits){
|
524
|
split = split.trim().replaceAll(" p.p.$", "");
|
525
|
if (split.equals("Griseb. 2")){
|
526
|
result.add(getSourceByNumber("2", state));
|
527
|
}else if (split.equals("Sauv. 3")){
|
528
|
result.add(getSourceByNumber("3", state));
|
529
|
}else if (split.equals("Grisebach 5")){
|
530
|
result.add(getSourceByNumber("5", state));
|
531
|
}else if (split.equals("Griseb. 78")){
|
532
|
result.add(getSourceByNumber("78", state));
|
533
|
}else{
|
534
|
logger.warn("Concrete reference does not match: " + split);
|
535
|
}
|
536
|
}
|
537
|
return result;
|
538
|
}
|
539
|
|
540
|
|
541
|
/**
|
542
|
* @param state
|
543
|
* @param list
|
544
|
* @param isFirstSynonym
|
545
|
* @param synonymStr
|
546
|
* @param b
|
547
|
*/
|
548
|
private void checkFirstSynonym(CubaImportState state, List<IBotanicalName> list, boolean isFirstSynonym, String synonymStr, boolean isHeterotypicMatcher) {
|
549
|
if (!isFirstSynonym){
|
550
|
return;
|
551
|
}
|
552
|
String line = state.getCurrentLine() + ": ";
|
553
|
IBotanicalName currentName = isHeterotypicMatcher? (IBotanicalName)state.getCurrentTaxon().getName(): list.get(0);
|
554
|
boolean currentHasBasionym = currentName.getBasionymAuthorship() != null;
|
555
|
IBotanicalName firstSynonym = isHeterotypicMatcher ? list.get(0): list.get(1);
|
556
|
// if (list.size() <= 1){
|
557
|
// logger.error(line + "homotypic list size is 1 but shouldn't");
|
558
|
// return;
|
559
|
// }
|
560
|
if (isHeterotypicMatcher && currentHasBasionym){
|
561
|
logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has basionym author but has no homotypic basionym , but : " + synonymStr);
|
562
|
}else if (isHeterotypicMatcher){
|
563
|
//first synonym must not have a basionym author
|
564
|
if (firstSynonym.getBasionymAuthorship() != null){
|
565
|
logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has no basionym but first synonym requires basionym : " + synonymStr);
|
566
|
}
|
567
|
}else{ //isAcceptedMatcher
|
568
|
if (currentHasBasionym){
|
569
|
if (! matchAuthor(currentName.getBasionymAuthorship(), firstSynonym.getCombinationAuthorship())){
|
570
|
logger.info(line + "Current basionym author and first synonym combination author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
|
571
|
}
|
572
|
}else{
|
573
|
if (! matchAuthor(currentName.getCombinationAuthorship(), firstSynonym.getBasionymAuthorship())){
|
574
|
logger.info(line + "Current combination author and first synonym basionym author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
|
575
|
}
|
576
|
}
|
577
|
}
|
578
|
|
579
|
}
|
580
|
|
581
|
|
582
|
/**
|
583
|
* @param synonymStr
|
584
|
* @return
|
585
|
*/
|
586
|
private boolean isSpecialHeterotypic(String synonymStr) {
|
587
|
if (synonymStr == null){
|
588
|
return false;
|
589
|
}else if (synonymStr.equals("Rhynchospora prenleloupiana (‘prenteloupiana’) Boeckeler")){
|
590
|
return true;
|
591
|
}else if (synonymStr.equals("Psidium longipes var. orbiculare (O.Berg) McVaugh")){
|
592
|
return true;
|
593
|
}
|
594
|
return false;
|
595
|
}
|
596
|
|
597
|
|
598
|
/**
|
599
|
* @param areaKey
|
600
|
* @param record
|
601
|
* @param state
|
602
|
* @param taxon
|
603
|
*/
|
604
|
private void makeSingleProvinceDistribution(String areaKey,
|
605
|
HashMap<String, String> record,
|
606
|
CubaImportState state) {
|
607
|
try {
|
608
|
UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
|
609
|
if (areaUuid == null){
|
610
|
logger.warn("Area not recognized: " + areaKey);
|
611
|
return;
|
612
|
}
|
613
|
if (record.get(areaKey)==null){
|
614
|
return; //no status defined
|
615
|
}
|
616
|
|
617
|
NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
|
618
|
if (area == null){
|
619
|
logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
|
620
|
}
|
621
|
TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
|
622
|
PresenceAbsenceTerm status = makeProvinceStatus(areaKey, record, state);
|
623
|
Reference source = getAreaSourceByNumber(areaKey, record, state);
|
624
|
if (status == null && source == null){
|
625
|
logger.warn(state.getCurrentLine() + ": Province distribution status could not be defined: " + record.get(areaKey));
|
626
|
}else if (status == null){
|
627
|
status = PresenceAbsenceTerm.NATIVE();
|
628
|
}
|
629
|
Distribution distribution = Distribution.NewInstance(area, status);
|
630
|
desc.addElement(distribution);
|
631
|
distribution.addSource(makeDescriptionSource(state));
|
632
|
if (source != null){
|
633
|
distribution.addPrimaryTaxonomicSource(source, null);
|
634
|
}
|
635
|
} catch (UndefinedTransformerMethodException e) {
|
636
|
e.printStackTrace();
|
637
|
}
|
638
|
|
639
|
}
|
640
|
|
641
|
|
642
|
/**
|
643
|
* @param areaKey
|
644
|
* @param record
|
645
|
* @param state
|
646
|
*/
|
647
|
private Reference getAreaSourceByNumber(String areaKey, HashMap<String, String> record, CubaImportState state) {
|
648
|
String statusStr = record.get(areaKey);
|
649
|
if (statusStr == null){
|
650
|
return null;
|
651
|
}else{
|
652
|
statusStr = statusStr.trim();
|
653
|
}
|
654
|
if ("p 78".equals(statusStr)){
|
655
|
statusStr = "78";
|
656
|
}else if ("– 7".equals(statusStr)){
|
657
|
statusStr = "7";
|
658
|
}
|
659
|
return getSourceByNumber(statusStr, state);
|
660
|
}
|
661
|
|
662
|
private Reference getSourceByNumber(String number, CubaImportState state) {
|
663
|
if ("78".equals(number)){
|
664
|
return makeReference(state, CubaTransformer.uuidRefPteridophyta78);
|
665
|
}else if ("1".equals(number)){
|
666
|
return makeReference(state, CubaTransformer.uuidRefPteridophyta2);
|
667
|
}else if ("2".equals(number)){
|
668
|
return makeReference(state, CubaTransformer.uuidRefPteridophyta2);
|
669
|
}else if ("3".equals(number)){
|
670
|
return makeReference(state, CubaTransformer.uuidRefPteridophyta3);
|
671
|
}else if ("5".equals(number)){
|
672
|
return makeReference(state, CubaTransformer.uuidRefPteridophyta5);
|
673
|
}else if ("8".equals(number)){
|
674
|
return makeReference(state, CubaTransformer.uuidRefPteridophyta8_82);
|
675
|
}else if ("7".equals(number)){
|
676
|
return makeReference(state, CubaTransformer.uuidRefPteridophyta7);
|
677
|
}
|
678
|
return null;
|
679
|
}
|
680
|
|
681
|
|
682
|
/**
|
683
|
* @param synonymStr
|
684
|
* @param state
|
685
|
* @param homonyms
|
686
|
* @param homonymPart
|
687
|
* @param isDoubtful
|
688
|
* @param taxon
|
689
|
* @param homotypicalGroup
|
690
|
*/
|
691
|
private List<IBotanicalName> handleHomotypicGroup(String homotypicStrOrig,
|
692
|
CubaImportState state,
|
693
|
IBotanicalName homotypicName,
|
694
|
boolean isHeterotypic,
|
695
|
List<IBotanicalName> homonyms,
|
696
|
String homonymPart,
|
697
|
boolean isDoubtful) {
|
698
|
|
699
|
List<IBotanicalName> homotypicNameList = new ArrayList<>();
|
700
|
homotypicNameList.add(homotypicName);
|
701
|
|
702
|
String homotypicStr = homotypicStrOrig;
|
703
|
if (homotypicStr == null){
|
704
|
return homotypicNameList;
|
705
|
}else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
|
706
|
homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
|
707
|
}
|
708
|
|
709
|
HomotypicalGroup homotypicGroup = homotypicName.getHomotypicalGroup();
|
710
|
String[] splits = homotypicStr.split("\\s*,\\s*");
|
711
|
for (String split : splits){
|
712
|
split = replaceHomonIlleg(split);
|
713
|
boolean isHomonym = split.matches(".*" + HOMONYM_MARKER);
|
714
|
TaxonName newName = (TaxonName)makeName(state, split);
|
715
|
newName.setHomotypicalGroup(homotypicGroup); //not really necessary as this is later set anyway
|
716
|
if (newName.isProtectedTitleCache()){
|
717
|
logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
|
718
|
}
|
719
|
if (isHomonym){
|
720
|
homonyms.add(newName);
|
721
|
}
|
722
|
if (isHeterotypic){
|
723
|
Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(newName, null, null, homotypicGroup);
|
724
|
syn.setDoubtful(isDoubtful);
|
725
|
syn.addSource(makeOriginalSource(state));
|
726
|
// newName.addBasionym(homotypicName);
|
727
|
}else{
|
728
|
state.getCurrentTaxon().addHomotypicSynonymName(newName);
|
729
|
}
|
730
|
handleBasionym(state, homotypicNameList, homonyms, newName);
|
731
|
homotypicNameList.add(newName);
|
732
|
}
|
733
|
makeHomonyms(homonyms, homonymPart, state, homotypicGroup);
|
734
|
return homotypicNameList;
|
735
|
}
|
736
|
|
737
|
|
738
|
/**
|
739
|
* @param split
|
740
|
* @return
|
741
|
*/
|
742
|
private String replaceHomonIlleg(String split) {
|
743
|
String result = split.trim().replace("homon. illeg.", "nom. illeg. homon.").trim();
|
744
|
return result;
|
745
|
}
|
746
|
|
747
|
|
748
|
/**
|
749
|
* @param homonyms
|
750
|
* @param homonymPart
|
751
|
* @param state
|
752
|
* @param currentBasionym
|
753
|
*/
|
754
|
private void makeHomonyms(List<IBotanicalName> homonyms, String homonymPartOrig, CubaImportState state,
|
755
|
HomotypicalGroup homotypicGroup) {
|
756
|
String line = state.getCurrentLine() + ": ";
|
757
|
String homonymPart = homonymPartOrig == null ? "" : homonymPartOrig.trim();
|
758
|
if (homonyms.isEmpty() && homonymPart.equals("")){
|
759
|
return;
|
760
|
}else if (homonymPart.equals("")){
|
761
|
logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
|
762
|
return;
|
763
|
}
|
764
|
homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
|
765
|
String[] splits = homonymPart.split("\\]\\s*\\[");
|
766
|
if (splits.length != homonyms.size()){
|
767
|
if(homonyms.size() == 0 && splits.length >= 1){
|
768
|
handleSimpleBlockingNames(splits, state, homotypicGroup);
|
769
|
}else{
|
770
|
logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
|
771
|
}
|
772
|
return;
|
773
|
}
|
774
|
int i = 0;
|
775
|
for (String split : splits){
|
776
|
split = split.replaceAll("^non\\s+", "");
|
777
|
TaxonName newName = (TaxonName)makeName(state, split);
|
778
|
// BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
779
|
if (newName.isProtectedTitleCache()){
|
780
|
logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
|
781
|
}
|
782
|
homonyms.get(i).addRelationshipToName(newName, NameRelationshipType.LATER_HOMONYM(), null);
|
783
|
i++;
|
784
|
}
|
785
|
}
|
786
|
|
787
|
/**
|
788
|
* @param homonymPart
|
789
|
* @param state
|
790
|
* @param homotypicGroup
|
791
|
*/
|
792
|
private void handleSimpleBlockingNames(String[] splitsi,
|
793
|
CubaImportState state,
|
794
|
HomotypicalGroup homotypicGroup) {
|
795
|
List<IBotanicalName> replacementNameCandidates = new ArrayList<>();
|
796
|
for (String spliti : splitsi){
|
797
|
|
798
|
String split = spliti.replaceAll("^non\\s+", "");
|
799
|
IBotanicalName newName = makeName(state, split);
|
800
|
if (newName.isProtectedTitleCache()){
|
801
|
logger.warn(state.getCurrentLine() + ": blocking name could not be parsed: " + split);
|
802
|
}
|
803
|
Set<IBotanicalName> typifiedNames = (Set)homotypicGroup.getTypifiedNames();
|
804
|
Set<IBotanicalName> candidates = new HashSet<>();
|
805
|
for (IBotanicalName name : typifiedNames){
|
806
|
if (name.getGenusOrUninomial() != null && name.getGenusOrUninomial().equals(newName.getGenusOrUninomial())){
|
807
|
if (name.getStatus().isEmpty() || ! name.getStatus().iterator().next().getType().equals(NomenclaturalStatusType.ILLEGITIMATE())){
|
808
|
candidates.add(name);
|
809
|
}
|
810
|
}
|
811
|
}
|
812
|
if (candidates.size() == 1){
|
813
|
TaxonName blockedName = (TaxonName)candidates.iterator().next();
|
814
|
newName.addRelationshipToName(blockedName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
|
815
|
replacementNameCandidates.add(blockedName);
|
816
|
}else{
|
817
|
logger.warn(state.getCurrentLine() + ": Blocking name could not be handled. " + candidates.size() + " candidates.");
|
818
|
}
|
819
|
}
|
820
|
makeReplacedSynonymIfPossible(state, homotypicGroup, replacementNameCandidates);
|
821
|
}
|
822
|
|
823
|
/**
|
824
|
* @param homotypicGroup
|
825
|
* @param replacementNameCandidates
|
826
|
*/
|
827
|
private void makeReplacedSynonymIfPossible(CubaImportState state,
|
828
|
HomotypicalGroup homotypicGroup,
|
829
|
List<IBotanicalName> replacementNameCandidates) {
|
830
|
String line = state.getCurrentLine() +": ";
|
831
|
List<IBotanicalName> replacedCandidates = new ArrayList<>();
|
832
|
for (TaxonName typifiedName : homotypicGroup.getTypifiedNames()){
|
833
|
IBotanicalName candidate = typifiedName;
|
834
|
if (candidate.getBasionymAuthorship() == null){
|
835
|
if (candidate.getStatus().isEmpty()){
|
836
|
if (! replacementNameCandidates.contains(candidate)){
|
837
|
replacedCandidates.add(candidate);
|
838
|
}
|
839
|
}
|
840
|
}
|
841
|
}
|
842
|
if (replacedCandidates.size() == 1){
|
843
|
TaxonName replacedSynonym = (TaxonName)replacedCandidates.iterator().next();
|
844
|
for (IBotanicalName replacementName : replacementNameCandidates){
|
845
|
replacementName.addReplacedSynonym(replacedSynonym, null, null, null);
|
846
|
}
|
847
|
}else if (replacedCandidates.size() < 1){
|
848
|
logger.warn(line + "No replaced synonym candidate found");
|
849
|
}else{
|
850
|
logger.warn(line + "More than 1 ("+replacedCandidates.size()+") replaced synonym candidates found");
|
851
|
}
|
852
|
}
|
853
|
|
854
|
|
855
|
/**
|
856
|
* @param homotypicGroup
|
857
|
* @param newName
|
858
|
*/
|
859
|
private void handleBasionym(CubaImportState state, List<IBotanicalName> homotypicNameList,
|
860
|
List<IBotanicalName> homonyms, IBotanicalName newName) {
|
861
|
for (IBotanicalName existingName : homotypicNameList){
|
862
|
if (existingName != newName){ //should not happen anymore, as new name is added later
|
863
|
boolean onlyIfNotYetExists = true;
|
864
|
createBasionymRelationIfPossible(state, existingName, newName, homonyms.contains(newName), onlyIfNotYetExists);
|
865
|
}
|
866
|
}
|
867
|
}
|
868
|
|
869
|
/**
|
870
|
* @param state
|
871
|
* @param name1
|
872
|
* @param name2
|
873
|
* @return
|
874
|
*/
|
875
|
private void createBasionymRelationIfPossible(CubaImportState state, IBotanicalName name1,
|
876
|
IBotanicalName name2,
|
877
|
boolean name2isHomonym, boolean onlyIfNotYetExists) {
|
878
|
TaxonName basionymName = TaxonName.castAndDeproxy(name1);
|
879
|
TaxonName newCombination = TaxonName.castAndDeproxy(name2);
|
880
|
//exactly one name must have a basionym author
|
881
|
if (name1.getBasionymAuthorship() == null && name2.getBasionymAuthorship() == null
|
882
|
|| name1.getBasionymAuthorship() != null && name2.getBasionymAuthorship() != null){
|
883
|
return;
|
884
|
}
|
885
|
|
886
|
//switch order if necessary
|
887
|
if (! name2isHomonym && basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
|
888
|
basionymName = TaxonName.castAndDeproxy(name2);
|
889
|
newCombination = TaxonName.castAndDeproxy(name1);
|
890
|
}
|
891
|
if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())
|
892
|
&& BasionymRelationCreator.matchLastNamePart(basionymName, newCombination)){
|
893
|
newCombination.addBasionym(basionymName);
|
894
|
}else{
|
895
|
if ( (newCombination.getBasionyms().isEmpty() || ! onlyIfNotYetExists)
|
896
|
&& isLegitimate(basionymName)
|
897
|
&& ! name2isHomonym){
|
898
|
logger.info(state.getCurrentLine() + ": Names are potential basionyms but either author or name part do not match: " + basionymName.getTitleCache() + " <-> " + newCombination.getTitleCache());
|
899
|
}
|
900
|
}
|
901
|
}
|
902
|
|
903
|
/**
|
904
|
* @param basionymName
|
905
|
* @return
|
906
|
*/
|
907
|
private boolean isLegitimate(IBotanicalName basionymName) {
|
908
|
for (NomenclaturalStatus nomStatus : basionymName.getStatus()){
|
909
|
if (nomStatus.getType()!= null && nomStatus.getType().isIllegitimateType()){
|
910
|
return false;
|
911
|
}
|
912
|
}
|
913
|
for (NameRelationship nameRel : basionymName.getNameRelations()){
|
914
|
if (nameRel.getType()!= null && nameRel.getType().isIllegitimateType()){
|
915
|
return false;
|
916
|
}
|
917
|
}
|
918
|
return true;
|
919
|
}
|
920
|
|
921
|
|
922
|
/**
|
923
|
* @param combinationAuthorship
|
924
|
* @param basi
|
925
|
* @return
|
926
|
*/
|
927
|
private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
|
928
|
if (author1 == null || author2 == null){
|
929
|
return false;
|
930
|
}else {
|
931
|
return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
|
932
|
}
|
933
|
}
|
934
|
|
935
|
|
936
|
/**
|
937
|
* @param record
|
938
|
* @param state
|
939
|
* @param taxon
|
940
|
*/
|
941
|
private void makeNotes(HashMap<String, String> record, CubaImportState state) {
|
942
|
String notesStr = getValue(record, "(Notas)");
|
943
|
if (notesStr == null){
|
944
|
return;
|
945
|
}else{
|
946
|
Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
|
947
|
//TODO
|
948
|
annotation.setAnnotationType(AnnotationType.TECHNICAL());
|
949
|
state.getCurrentTaxon().addAnnotation(annotation);
|
950
|
}
|
951
|
}
|
952
|
|
953
|
|
954
|
/**
|
955
|
* @param record
|
956
|
* @param state
|
957
|
* @param familyTaxon
|
958
|
* @return
|
959
|
*/
|
960
|
private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
|
961
|
String taxonStrOrig = getValue(record, "Taxón");
|
962
|
if (taxonStrOrig == null){
|
963
|
return isSynonym ? state.getCurrentTaxon() : null;
|
964
|
}
|
965
|
|
966
|
boolean isAbsent = false;
|
967
|
String taxonStr = taxonStrOrig;
|
968
|
if (taxonStrOrig.startsWith("[") && taxonStrOrig.endsWith("]")){
|
969
|
taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
|
970
|
isAbsent = true;
|
971
|
}
|
972
|
|
973
|
boolean isAuct = false;
|
974
|
if (taxonStr.endsWith("auct.")){
|
975
|
isAuct = true;
|
976
|
taxonStr.replace("auct.", "").trim();
|
977
|
}
|
978
|
state.setTaxonIsAbsent(isAbsent);
|
979
|
IBotanicalName botanicalName = makeName(state, taxonStr);
|
980
|
Reference sec = getSecReference(state);
|
981
|
Taxon taxon = Taxon.NewInstance(botanicalName, sec);
|
982
|
if (isAuct){
|
983
|
taxon.setAppendedPhrase("auct.");
|
984
|
}
|
985
|
|
986
|
TaxonNode higherNode;
|
987
|
if (botanicalName.isProtectedTitleCache()){
|
988
|
logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStrOrig);
|
989
|
higherNode = familyNode;
|
990
|
}else{
|
991
|
String genusStr = botanicalName.getGenusOrUninomial();
|
992
|
Taxon genus = state.getHigherTaxon(genusStr);
|
993
|
if (genus != null){
|
994
|
higherNode = genus.getTaxonNodes().iterator().next();
|
995
|
}else{
|
996
|
IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.GENUS());
|
997
|
name.addSource(makeOriginalSource(state));
|
998
|
name.setGenusOrUninomial(genusStr);
|
999
|
genus = Taxon.NewInstance(name, sec);
|
1000
|
genus.addSource(makeOriginalSource(state));
|
1001
|
higherNode = familyNode.addChildTaxon(genus, null, null);
|
1002
|
getTaxonNodeService().saveOrUpdate(higherNode);
|
1003
|
state.putHigherTaxon(genusStr, genus);
|
1004
|
}
|
1005
|
}
|
1006
|
taxon.addSource(makeOriginalSource(state));
|
1007
|
|
1008
|
TaxonNode newNode = higherNode.addChildTaxon(taxon, null, null);
|
1009
|
getTaxonNodeService().saveOrUpdate(newNode);
|
1010
|
if(isAbsent){
|
1011
|
botanicalName.setTitleCache(taxonStrOrig, true);
|
1012
|
newNode.setExcluded(true);
|
1013
|
}
|
1014
|
|
1015
|
return taxon;
|
1016
|
}
|
1017
|
|
1018
|
private final String orthVarRegExStr = "[A-Z][a-z]+\\s[a-z]+\\s(\\(‘([a-z]){3,}’\\))\\s(\\([A-Z][a-z]+\\.?\\)\\s)?[A-Z][a-zó]+\\.?";
|
1019
|
private final Pattern orthVarRegEx = Pattern.compile(orthVarRegExStr);
|
1020
|
/**
|
1021
|
* @param taxonStr
|
1022
|
* @return
|
1023
|
*/
|
1024
|
private IBotanicalName makeName(CubaImportState state, String nameStrOrig) {
|
1025
|
//normalize
|
1026
|
String nameStr = normalizeStatus(nameStrOrig);
|
1027
|
//orthVar
|
1028
|
Matcher orthVarMatcher = orthVarRegEx.matcher(nameStr);
|
1029
|
String orthVar = null;
|
1030
|
if (orthVarMatcher.matches()) {
|
1031
|
orthVar = orthVarMatcher.group(1);
|
1032
|
nameStr = nameStr.replace(" " + orthVar, "").trim().replaceAll("\\s{2,}", " ");
|
1033
|
orthVar = orthVar.substring(2, orthVar.length() - 2);
|
1034
|
}
|
1035
|
|
1036
|
boolean isNomInval = false;
|
1037
|
if (nameStr.endsWith("nom. inval.")){
|
1038
|
isNomInval = true;
|
1039
|
nameStr = nameStr.replace("nom. inval.", "").trim();
|
1040
|
}
|
1041
|
|
1042
|
TaxonName result = (TaxonName)nameParser.parseReferencedName(nameStr, nc, Rank.SPECIES());
|
1043
|
result.addSource(makeOriginalSource(state));
|
1044
|
if (isNomInval){
|
1045
|
result.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.INVALID()));
|
1046
|
}
|
1047
|
if (orthVar != null){
|
1048
|
TaxonName orthVarName = (TaxonName)result.clone();
|
1049
|
orthVarName.addSource(makeOriginalSource(state));
|
1050
|
//TODO
|
1051
|
Reference citation = null;
|
1052
|
orthVarName.addRelationshipToName(result, NameRelationshipType.ORTHOGRAPHIC_VARIANT(), citation, null, null);
|
1053
|
orthVarName.setSpecificEpithet(orthVar);
|
1054
|
}
|
1055
|
normalizeAuthors(result);
|
1056
|
return result;
|
1057
|
|
1058
|
}
|
1059
|
|
1060
|
/**
|
1061
|
* @param result
|
1062
|
*/
|
1063
|
private void normalizeAuthors(IBotanicalName result) {
|
1064
|
result.setCombinationAuthorship(normalizeAuthor(result.getCombinationAuthorship()));
|
1065
|
result.setExCombinationAuthorship(normalizeAuthor(result.getExCombinationAuthorship()));
|
1066
|
result.setExBasionymAuthorship(normalizeAuthor(result.getExBasionymAuthorship()));
|
1067
|
result.setBasionymAuthorship(normalizeAuthor(result.getBasionymAuthorship()));
|
1068
|
|
1069
|
}
|
1070
|
|
1071
|
|
1072
|
/**
|
1073
|
* @param combinationAuthorship
|
1074
|
* @return
|
1075
|
*/
|
1076
|
private TeamOrPersonBase<?> normalizeAuthor(TeamOrPersonBase<?> author) {
|
1077
|
if (author == null){
|
1078
|
return null;
|
1079
|
}
|
1080
|
TeamOrPersonBase<?> result;
|
1081
|
if (author.isInstanceOf(Person.class)){
|
1082
|
result = normalizePerson(CdmBase.deproxy(author, Person.class));
|
1083
|
}else{
|
1084
|
Team team = CdmBase.deproxy(author, Team.class);
|
1085
|
List<Person> list = team.getTeamMembers();
|
1086
|
for(int i = 0; i < list.size(); i++){
|
1087
|
Person person = list.get(i);
|
1088
|
Person tmpMember = normalizePerson(person);
|
1089
|
list.set(i, tmpMember);
|
1090
|
}
|
1091
|
return team;
|
1092
|
}
|
1093
|
return result;
|
1094
|
}
|
1095
|
|
1096
|
|
1097
|
/**
|
1098
|
* @param deproxy
|
1099
|
* @return
|
1100
|
*/
|
1101
|
private Person normalizePerson(Person person) {
|
1102
|
String title = person.getNomenclaturalTitle();
|
1103
|
title = title.replaceAll("(?<=[a-zA-Z])\\.(?=[a-zA-Z])", ". ");
|
1104
|
person.setNomenclaturalTitle(title);
|
1105
|
boolean isFilius = title.endsWith(" f.");
|
1106
|
if (isFilius){
|
1107
|
title.replace(" f.", "");
|
1108
|
}
|
1109
|
|
1110
|
String[] splits = title.split("\\s+");
|
1111
|
int nNotFirstName = isFilius ? 2 : 1;
|
1112
|
person.setLastname(splits[splits.length - nNotFirstName] + (isFilius? " f." : ""));
|
1113
|
person.setFirstname(CdmUtils.concat(" ", Arrays.copyOfRange(splits, 0, splits.length-nNotFirstName)));
|
1114
|
return person;
|
1115
|
}
|
1116
|
|
1117
|
|
1118
|
/**
|
1119
|
* @param state
|
1120
|
* @return
|
1121
|
*/
|
1122
|
private Reference getSecReference(CubaImportState state) {
|
1123
|
Reference result = state.getSecReference();
|
1124
|
if (result == null){
|
1125
|
result = ReferenceFactory.newDatabase();
|
1126
|
result.setTitle("Flora of Cuba");
|
1127
|
state.setSecReference(result);
|
1128
|
}
|
1129
|
return result;
|
1130
|
}
|
1131
|
|
1132
|
|
1133
|
private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
|
1134
|
"nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub.", "nom. nud."};
|
1135
|
/**
|
1136
|
* @param taxonStr
|
1137
|
* @return
|
1138
|
*/
|
1139
|
private String normalizeStatus(String nameStr) {
|
1140
|
if (nameStr == null){
|
1141
|
return null;
|
1142
|
}
|
1143
|
String result = nameStr.replaceAll(HOMONYM_MARKER, "").trim();
|
1144
|
for (String nomStatusStr : nomStatusStrings){
|
1145
|
nomStatusStr = " " + nomStatusStr;
|
1146
|
if (result.endsWith(nomStatusStr)){
|
1147
|
result = result.replace(nomStatusStr, "," + nomStatusStr);
|
1148
|
}
|
1149
|
}
|
1150
|
result = result.replaceAll(DOUBTFUL_MARKER, "").trim();
|
1151
|
result = result.replace("[taxon]", "[infraspec.]");
|
1152
|
return result;
|
1153
|
|
1154
|
|
1155
|
}
|
1156
|
|
1157
|
|
1158
|
/**
|
1159
|
* @param record
|
1160
|
* @param state
|
1161
|
* @return
|
1162
|
*/
|
1163
|
private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
|
1164
|
String familyStr = getValue(record, "Fam. default");
|
1165
|
if (familyStr == null){
|
1166
|
return null;
|
1167
|
}
|
1168
|
familyStr = familyStr.trim();
|
1169
|
String alternativeFamilyStr = null;
|
1170
|
if (familyStr.contains("/")){
|
1171
|
String[] splits = familyStr.split("/");
|
1172
|
if (splits.length > 2){
|
1173
|
logger.warn(state.getCurrentLine() +": " + "More than 1 alternative name:" + familyStr);
|
1174
|
}
|
1175
|
familyStr = splits[0].trim();
|
1176
|
alternativeFamilyStr = splits[1].trim();
|
1177
|
}
|
1178
|
|
1179
|
Taxon family = state.getHigherTaxon(familyStr);
|
1180
|
TaxonNode familyNode;
|
1181
|
if (family != null){
|
1182
|
familyNode = family.getTaxonNodes().iterator().next();
|
1183
|
}else{
|
1184
|
TaxonName name = (TaxonName)makeFamilyName(state, familyStr);
|
1185
|
Reference sec = getSecReference(state);
|
1186
|
family = Taxon.NewInstance(name, sec);
|
1187
|
ITaxonTreeNode rootNode = getClassification(state);
|
1188
|
familyNode = rootNode.addChildTaxon(family, sec, null);
|
1189
|
this.getTaxonNodeService().saveOrUpdate(familyNode);
|
1190
|
state.putHigherTaxon(familyStr, family);
|
1191
|
|
1192
|
}
|
1193
|
|
1194
|
if (isNotBlank(alternativeFamilyStr)){
|
1195
|
NameRelationshipType type = NameRelationshipType.ALTERNATIVE_NAME();
|
1196
|
TaxonName alternativeName = (TaxonName)makeFamilyName(state, alternativeFamilyStr);
|
1197
|
IBotanicalName familyName = family.getName();
|
1198
|
boolean hasRelation = false;
|
1199
|
for (NameRelationship nameRel : familyName.getRelationsToThisName()){
|
1200
|
if (nameRel.getType().equals(type)){
|
1201
|
if (nameRel.getFromName().equals(alternativeName)){
|
1202
|
hasRelation = true;
|
1203
|
}
|
1204
|
}
|
1205
|
}
|
1206
|
if (!hasRelation){
|
1207
|
familyName.addRelationshipFromName(alternativeName, type, null);
|
1208
|
}
|
1209
|
|
1210
|
}
|
1211
|
|
1212
|
return familyNode;
|
1213
|
}
|
1214
|
|
1215
|
|
1216
|
/**
|
1217
|
* @param state
|
1218
|
* @param taxon
|
1219
|
*/
|
1220
|
private void validateTaxonIsAbsent(CubaImportState state, Taxon taxon) {
|
1221
|
if (!state.isTaxonIsAbsent()){
|
1222
|
return;
|
1223
|
}
|
1224
|
|
1225
|
for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
|
1226
|
if (el instanceof Distribution){
|
1227
|
Distribution dist = (Distribution)el;
|
1228
|
NamedArea area = dist.getArea();
|
1229
|
if (isCubanArea(area)){
|
1230
|
PresenceAbsenceTerm status = dist.getStatus();
|
1231
|
if (status != null && !status.isAbsenceTerm()){
|
1232
|
if (!isDoubtfulTerm(status)){
|
1233
|
String name = taxon.getName().getTitleCache();
|
1234
|
logger.error(state.getCurrentLine() +": Taxon ("+name+")is absent'[]' but has presence distribution: " + status.getTitleCache());
|
1235
|
return;
|
1236
|
}
|
1237
|
}
|
1238
|
}
|
1239
|
}
|
1240
|
}
|
1241
|
}
|
1242
|
|
1243
|
/**
|
1244
|
* @param state
|
1245
|
* @param taxon
|
1246
|
*/
|
1247
|
private void validateEndemic(CubaImportState state, Taxon taxon) {
|
1248
|
|
1249
|
boolean hasExternalPresence = false;
|
1250
|
for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
|
1251
|
if (el instanceof Distribution){
|
1252
|
Distribution dist = (Distribution)el;
|
1253
|
NamedArea area = dist.getArea();
|
1254
|
if (!isCubanArea(area)){
|
1255
|
PresenceAbsenceTerm status = dist.getStatus();
|
1256
|
if (status != null && !status.isAbsenceTerm()){
|
1257
|
if (!isDoubtfulTerm(status)){
|
1258
|
hasExternalPresence = true;
|
1259
|
if (state.isEndemic()){
|
1260
|
String name = taxon.getName().getTitleCache();
|
1261
|
logger.warn(state.getCurrentLine() +": Taxon ("+name+")is endemic but has non-cuban distribution: " + area.getIdInVocabulary() + "-" + status.getIdInVocabulary());
|
1262
|
return;
|
1263
|
}
|
1264
|
}
|
1265
|
}
|
1266
|
}
|
1267
|
}
|
1268
|
}
|
1269
|
if (!state.isEndemic() && ! hasExternalPresence){
|
1270
|
String name = taxon.getName().getTitleCache();
|
1271
|
logger.error(state.getCurrentLine() +": Taxon ("+name+") is not endemic but has no non-cuban distribution" );
|
1272
|
}
|
1273
|
}
|
1274
|
|
1275
|
|
1276
|
/**
|
1277
|
* @param state
|
1278
|
* @param taxon
|
1279
|
* @param famStr
|
1280
|
* @param famRef
|
1281
|
* @return
|
1282
|
*/
|
1283
|
private Taxon makeAlternativeFamilyTaxon(CubaImportState state, String famStr, Reference famRef) {
|
1284
|
String key = famRef.getTitle() + ":"+ famStr;
|
1285
|
Taxon family = state.getHigherTaxon(key);
|
1286
|
if (family == null){
|
1287
|
IBotanicalName name = makeFamilyName(state, famStr);
|
1288
|
family = Taxon.NewInstance(name, famRef);
|
1289
|
state.putHigherTaxon(key, family);
|
1290
|
}
|
1291
|
|
1292
|
return family;
|
1293
|
}
|
1294
|
|
1295
|
|
1296
|
/**
|
1297
|
* @param state
|
1298
|
* @param famStr
|
1299
|
* @return
|
1300
|
*/
|
1301
|
private IBotanicalName makeFamilyName(CubaImportState state, String famStr) {
|
1302
|
IBotanicalName name = state.getFamilyName(famStr);
|
1303
|
if (name == null){
|
1304
|
name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
|
1305
|
name.setGenusOrUninomial(famStr);
|
1306
|
state.putFamilyName(famStr, name);
|
1307
|
name.addSource(makeOriginalSource(state));
|
1308
|
}
|
1309
|
return name;
|
1310
|
}
|
1311
|
|
1312
|
|
1313
|
/**
|
1314
|
* @param state
|
1315
|
* @return
|
1316
|
*/
|
1317
|
private TaxonNode getClassification(CubaImportState state) {
|
1318
|
Classification classification = state.getClassification();
|
1319
|
if (classification == null){
|
1320
|
classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
1321
|
}
|
1322
|
TaxonNode rootNode = state.getRootNode();
|
1323
|
if (rootNode == null){
|
1324
|
rootNode = getTaxonNodeService().find(pteridophytaUuid);
|
1325
|
}
|
1326
|
if (rootNode == null){
|
1327
|
Reference sec = getSecReference(state);
|
1328
|
if (classification == null){
|
1329
|
String classificationName = state.getConfig().getClassificationName();
|
1330
|
//TODO
|
1331
|
Language language = Language.DEFAULT();
|
1332
|
classification = Classification.NewInstance(classificationName, sec, language);
|
1333
|
state.setClassification(classification);
|
1334
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
1335
|
classification.getRootNode().setUuid(rootUuid);
|
1336
|
}
|
1337
|
|
1338
|
IBotanicalName spermatophytaName = TaxonNameFactory.NewBotanicalInstance(Rank.DIVISION());
|
1339
|
spermatophytaName.setGenusOrUninomial("Spermatophyta");
|
1340
|
Taxon spermatophyta = Taxon.NewInstance(spermatophytaName, sec);
|
1341
|
TaxonNode spermatophytaNode = classification.addChildTaxon(spermatophyta, null, null);
|
1342
|
spermatophytaNode.setUuid(spermatophytaUuid);
|
1343
|
state.setRootNode(spermatophytaNode);
|
1344
|
getClassificationService().save(classification);
|
1345
|
|
1346
|
rootNode = spermatophytaNode;
|
1347
|
}
|
1348
|
return rootNode;
|
1349
|
}
|
1350
|
|
1351
|
|
1352
|
/**
|
1353
|
* @param record
|
1354
|
* @param originalKey
|
1355
|
* @return
|
1356
|
*/
|
1357
|
private String getValue(HashMap<String, String> record, String originalKey) {
|
1358
|
String value = record.get(originalKey);
|
1359
|
if (! StringUtils.isBlank(value)) {
|
1360
|
if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
|
1361
|
value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
|
1362
|
return value;
|
1363
|
}else{
|
1364
|
return null;
|
1365
|
}
|
1366
|
}
|
1367
|
|
1368
|
|
1369
|
|
1370
|
/**
|
1371
|
* Stores taxa records in DB
|
1372
|
*/
|
1373
|
@Override
|
1374
|
protected void firstPass(CubaImportState state) {
|
1375
|
boolean isSynonymOnly = false;
|
1376
|
|
1377
|
String line = state.getCurrentLine() + ": ";
|
1378
|
HashMap<String, String> record = state.getOriginalRecord();
|
1379
|
|
1380
|
Set<String> keys = record.keySet();
|
1381
|
for (String key: keys) {
|
1382
|
if (! expectedKeys.contains(key)){
|
1383
|
logger.warn(line + "Unexpected Key: " + key);
|
1384
|
}
|
1385
|
}
|
1386
|
|
1387
|
if (record.get("Fam. default") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
|
1388
|
//second header line, don't handle
|
1389
|
return;
|
1390
|
}
|
1391
|
|
1392
|
//Fam.
|
1393
|
TaxonNode familyTaxon = getFamilyTaxon(record, state);
|
1394
|
if (familyTaxon == null){
|
1395
|
if (record.get("Taxón") != null){
|
1396
|
logger.warn(line + "Family not recognized but taxon exists: " + record.get("Taxón"));
|
1397
|
return;
|
1398
|
}else if (record.get("Syn.") == null){
|
1399
|
logger.warn(line + "Family not recognized but also no synonym exists");
|
1400
|
return;
|
1401
|
}else{
|
1402
|
isSynonymOnly = true;
|
1403
|
}
|
1404
|
}
|
1405
|
|
1406
|
//Taxón
|
1407
|
Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonymOnly);
|
1408
|
if (taxon == null && ! isSynonymOnly){
|
1409
|
logger.warn(line + "taxon could not be created and is null");
|
1410
|
return;
|
1411
|
}
|
1412
|
state.setCurrentTaxon(taxon);
|
1413
|
|
1414
|
//Fam. ALT
|
1415
|
if (!isSynonymOnly){
|
1416
|
makeAlternativeFamilies(record, state, familyTaxon, taxon);
|
1417
|
}
|
1418
|
|
1419
|
//(Notas)
|
1420
|
makeNotes(record, state);
|
1421
|
|
1422
|
//Syn.
|
1423
|
makeSynonyms(record, state, !isSynonymOnly);
|
1424
|
|
1425
|
//End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
|
1426
|
makeCubanDistribution(record, state);
|
1427
|
|
1428
|
|
1429
|
// "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
1430
|
// "CuC","VC","Ci","SS","CA","Cam","LT",
|
1431
|
// "CuE","Gr","Ho","SC","Gu",
|
1432
|
makeProvincesDistribution(record, state);
|
1433
|
|
1434
|
// "Esp","Ja","PR","Men","Bah","Cay",
|
1435
|
// "AmN","AmC","AmS","VM"});
|
1436
|
makeOtherAreasDistribution(record, state);
|
1437
|
|
1438
|
validateTaxonIsAbsent(state, taxon);
|
1439
|
if (!isSynonymOnly){
|
1440
|
validateEndemic(state, taxon);
|
1441
|
}
|
1442
|
|
1443
|
state.setHighestStatusForTaxon(null);
|
1444
|
|
1445
|
return;
|
1446
|
}
|
1447
|
|
1448
|
|
1449
|
/**
|
1450
|
* @param state
|
1451
|
* @return
|
1452
|
*/
|
1453
|
private IdentifiableSource makeOriginalSource(CubaImportState state) {
|
1454
|
return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
|
1455
|
}
|
1456
|
/**
|
1457
|
* @param state
|
1458
|
* @return
|
1459
|
*/
|
1460
|
private DescriptionElementSource makeDescriptionSource(CubaImportState state) {
|
1461
|
return DescriptionElementSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
|
1462
|
}
|
1463
|
|
1464
|
private static Set<UUID> doubtfulStatus = new HashSet<>();
|
1465
|
|
1466
|
/**
|
1467
|
* @param status
|
1468
|
* @return
|
1469
|
*/
|
1470
|
private boolean isDoubtfulTerm(PresenceAbsenceTerm status) {
|
1471
|
if (doubtfulStatus.isEmpty()){
|
1472
|
doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyNaturalisedUuid);
|
1473
|
doubtfulStatus.add(CubaTransformer.doubtfulIndigenousDoubtfulUuid);
|
1474
|
doubtfulStatus.add(CubaTransformer.endemicDoubtfullyPresentUuid);
|
1475
|
doubtfulStatus.add(CubaTransformer.naturalisedDoubtfullyPresentUuid);
|
1476
|
doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyPresentUuid);
|
1477
|
doubtfulStatus.add(CubaTransformer.occasionallyCultivatedUuid);
|
1478
|
doubtfulStatus.add(CubaTransformer.rareCasualUuid);
|
1479
|
doubtfulStatus.add(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE().getUuid());
|
1480
|
doubtfulStatus.add(PresenceAbsenceTerm.CULTIVATED_PRESENCE_QUESTIONABLE().getUuid());
|
1481
|
}
|
1482
|
boolean isDoubtful = doubtfulStatus.contains(status.getUuid());
|
1483
|
return isDoubtful;
|
1484
|
}
|
1485
|
|
1486
|
|
1487
|
/**
|
1488
|
* @param area
|
1489
|
* @return
|
1490
|
*/
|
1491
|
private boolean isCubanArea(NamedArea area) {
|
1492
|
if (area.getUuid().equals(CubaTransformer.uuidCuba)){
|
1493
|
return true;
|
1494
|
}else if (area.getPartOf()!= null){
|
1495
|
return isCubanArea(area.getPartOf());
|
1496
|
}else{
|
1497
|
return false;
|
1498
|
}
|
1499
|
}
|
1500
|
|
1501
|
|
1502
|
/**
|
1503
|
* @param record
|
1504
|
* @param state
|
1505
|
* @param familyTaxon
|
1506
|
* @param taxon
|
1507
|
*/
|
1508
|
private void makeAlternativeFamilies(HashMap<String, String> record,
|
1509
|
CubaImportState state,
|
1510
|
TaxonNode familyTaxon,
|
1511
|
Taxon taxon) {
|
1512
|
|
1513
|
CubaImportConfigurator config = state.getConfig();
|
1514
|
|
1515
|
String famFRC = record.get("Fam. FRC");
|
1516
|
String famAS = record.get("Fam. A&S");
|
1517
|
String famFC = record.get("Fam. FC");
|
1518
|
String famSanchez2017 = record.get("Fam. Sánchez 2017");
|
1519
|
|
1520
|
if (config.isDoAltFlorasFRC()){
|
1521
|
Reference refFRC = makeReference(state, CubaTransformer.uuidRefFRC);
|
1522
|
makeSingleAlternativeFamily(state, taxon, famFRC, refFRC);
|
1523
|
}
|
1524
|
|
1525
|
if (config.isDoAltFlorasAS()){
|
1526
|
Reference refAS = makeReference(state, CubaTransformer.uuidRefAS);
|
1527
|
makeSingleAlternativeFamily(state, taxon, famAS, refAS);
|
1528
|
}
|
1529
|
|
1530
|
if (config.isDoAltFlorasFC()){
|
1531
|
Reference refFC = makeReference(state, CubaTransformer.uuidRefFC);
|
1532
|
makeSingleAlternativeFamily(state, taxon, famFC, refFC);
|
1533
|
}
|
1534
|
|
1535
|
if (config.isDoAltFlorasSanchez2017()){
|
1536
|
Reference refSanchez2017 = makeReference(state, CubaTransformer.uuidRefSanchez);
|
1537
|
makeSingleAlternativeFamily(state, taxon, famSanchez2017, refSanchez2017);
|
1538
|
}
|
1539
|
}
|
1540
|
|
1541
|
|
1542
|
/**
|
1543
|
* @param state
|
1544
|
* @param uuidreffrc
|
1545
|
* @return
|
1546
|
*/
|
1547
|
private Reference makeReference(CubaImportState state, UUID uuidRef) {
|
1548
|
Reference ref = state.getReference(uuidRef);
|
1549
|
if (ref == null){
|
1550
|
ref = getReferenceService().find(uuidRef);
|
1551
|
state.putReference(uuidRef, ref);
|
1552
|
}
|
1553
|
return ref;
|
1554
|
}
|
1555
|
|
1556
|
|
1557
|
/**
|
1558
|
* @param state
|
1559
|
* @param taxon
|
1560
|
* @param famString
|
1561
|
* @param famRef
|
1562
|
*/
|
1563
|
private void makeSingleAlternativeFamily(CubaImportState state, Taxon taxon, String famStr, Reference famRef) {
|
1564
|
if (isBlank(famStr)){
|
1565
|
famStr = "-";
|
1566
|
// return;
|
1567
|
}
|
1568
|
|
1569
|
TaxonDescription desc = getTaxonDescription(taxon, false, true);
|
1570
|
|
1571
|
// UUID altFamUuid1;
|
1572
|
UUID altFamUuid2;
|
1573
|
try {
|
1574
|
// altFamUuid1 = state.getTransformer().getFeatureUuid("Alt.Fam.");
|
1575
|
altFamUuid2 = state.getTransformer().getFeatureUuid("Alt.Fam.2");
|
1576
|
} catch (UndefinedTransformerMethodException e) {
|
1577
|
throw new RuntimeException(e);
|
1578
|
}
|
1579
|
|
1580
|
|
1581
|
Taxon famTaxon = makeAlternativeFamilyTaxon(state, famStr, famRef);
|
1582
|
|
1583
|
|
1584
|
//TextData //not used anymore
|
1585
|
// Feature feature1 = getFeature(state, altFamUuid1, "Families in other Floras (Text)", "Families in other Floras (Text)", "Other floras", null);
|
1586
|
// feature1.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
|
1587
|
//// TextData textData = TextData.NewInstance(feature1, famStr, Language.DEFAULT(), null);
|
1588
|
// TextData textData = TextData.NewInstance(feature1, null, Language.DEFAULT(), null);
|
1589
|
// textData.putText(Language.SPANISH_CASTILIAN(), "Familias en otras Floras");
|
1590
|
// textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null, famTaxon.getName(),null);
|
1591
|
// desc.addElement(textData);
|
1592
|
|
1593
|
|
1594
|
|
1595
|
//TaxonInteraction
|
1596
|
Feature feature2 = getFeature(state, altFamUuid2, "Families in other Floras", "Families in other Floras", "Other floras", null);
|
1597
|
//feature should exist already
|
1598
|
// feature2.setSupportsTaxonInteraction(true);
|
1599
|
// feature2.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
|
1600
|
TaxonInteraction taxInteract = TaxonInteraction.NewInstance(feature2);
|
1601
|
taxInteract.setTaxon2(famTaxon);
|
1602
|
// taxInteract.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null);
|
1603
|
desc.addElement(taxInteract);
|
1604
|
|
1605
|
//Concept Relation //not used anymore
|
1606
|
// famTaxon.addTaxonRelation(taxon, TaxonRelationshipType.INCLUDES(), taxon.getSec(), null);
|
1607
|
|
1608
|
}
|
1609
|
|
1610
|
|
1611
|
|
1612
|
|
1613
|
|
1614
|
/**
|
1615
|
* @param record
|
1616
|
* @param state
|
1617
|
* @param taxon
|
1618
|
*/
|
1619
|
// "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
1620
|
// "CuC","VC","Ci","SS","CA","Cam","LT",
|
1621
|
// "CuE","Gr","Ho","SC","Gu",
|
1622
|
private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
|
1623
|
List<String> areaKeys = Arrays.asList(new String[]{
|
1624
|
"CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
1625
|
"CuC","VC","Ci","SS","CA","Cam","LT",
|
1626
|
"CuE","Gr","Ho","SC","Gu",
|
1627
|
});
|
1628
|
for (String areaKey : areaKeys){
|
1629
|
state.setCubanProvince(true);
|
1630
|
makeSingleProvinceDistribution(areaKey, record, state);
|
1631
|
}
|
1632
|
}
|
1633
|
|
1634
|
private void makeOtherAreasDistribution(HashMap<String, String> record, CubaImportState state) {
|
1635
|
List<String> areaKeys = Arrays.asList(new String[]{
|
1636
|
"Esp","Ja","PR","Men","Bah","Cay",
|
1637
|
"AmN","AmC","AmS","VM"});
|
1638
|
for (String areaKey : areaKeys){
|
1639
|
state.setCubanProvince(false);
|
1640
|
makeSingleProvinceDistribution(areaKey, record, state);
|
1641
|
}
|
1642
|
}
|
1643
|
|
1644
|
|
1645
|
|
1646
|
|
1647
|
/**
|
1648
|
* @param areaKey
|
1649
|
* @param record
|
1650
|
* @param state
|
1651
|
* @param highestStatus
|
1652
|
* @return
|
1653
|
* @throws UndefinedTransformerMethodException
|
1654
|
*/
|
1655
|
private PresenceAbsenceTerm makeProvinceStatus(String areaKey,
|
1656
|
HashMap<String, String> record,
|
1657
|
CubaImportState state) throws UndefinedTransformerMethodException {
|
1658
|
|
1659
|
String statusStr = record.get(areaKey);
|
1660
|
if (statusStr == null){
|
1661
|
return null;
|
1662
|
}else{
|
1663
|
statusStr = statusStr.trim();
|
1664
|
}
|
1665
|
PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
|
1666
|
if (status == null){
|
1667
|
// PresenceAbsenceTerm highestStatus = state.getHighestStatusForTaxon();
|
1668
|
if ("– 7".equals(statusStr)){
|
1669
|
statusStr = "–";
|
1670
|
}else if ("p 78".equals(statusStr)){
|
1671
|
statusStr = "p";
|
1672
|
}
|
1673
|
if (state.isCubanProvince() && isMinus(statusStr)){
|
1674
|
// getAbsenceTermForStatus(state, highestStatus);
|
1675
|
//we now handle cuban provinces same as external regions
|
1676
|
status = state.getTransformer().getPresenceTermByKey("--");
|
1677
|
}else if (! state.isCubanProvince() && isMinus(statusStr)){
|
1678
|
status = state.getTransformer().getPresenceTermByKey("--");
|
1679
|
}else{
|
1680
|
// logger.warn("Unhandled status str for provinces / external regions: " + statusStr);
|
1681
|
|
1682
|
UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
|
1683
|
if (statusUuid == null){
|
1684
|
if (! ("78".equals(statusStr)|| "1".equals(statusStr)||"8".equals(statusStr))){
|
1685
|
logger.error(state.getCurrentLine() + ": Undefined status str for provinces / external regions. No UUID given: '" + statusStr + "'");
|
1686
|
}
|
1687
|
}else{
|
1688
|
status = getPresenceTerm(state, statusUuid, statusStr, statusStr, statusStr, false);
|
1689
|
}
|
1690
|
}
|
1691
|
}
|
1692
|
|
1693
|
return status;
|
1694
|
}
|
1695
|
|
1696
|
|
1697
|
/**
|
1698
|
* @param highestStatus
|
1699
|
* @throws UndefinedTransformerMethodException
|
1700
|
*/
|
1701
|
private PresenceAbsenceTerm getAbsenceTermForStatus(CubaImportState state, PresenceAbsenceTerm highestStatus) throws UndefinedTransformerMethodException {
|
1702
|
if (highestStatus == null){
|
1703
|
logger.warn(state.getCurrentLine() + ": Highest status not defined");
|
1704
|
return null;
|
1705
|
}
|
1706
|
PresenceAbsenceTerm result = null;
|
1707
|
if (highestStatus.equals(getStatus(state, "E"))){
|
1708
|
result = getStatus(state, "-E");
|
1709
|
}else if (highestStatus.getUuid().equals(state.getTransformer().getPresenceTermUuid("Ind.")) || highestStatus.equals(PresenceAbsenceTerm.NATIVE())){
|
1710
|
result = getStatus(state, "-Ind.");
|
1711
|
}else if (highestStatus.equals(getStatus(state, "Ind.?"))){
|
1712
|
result = getStatus(state, "-Ind.?"); //TODO
|
1713
|
}else if (highestStatus.equals(getStatus(state, "N"))){
|
1714
|
result = getStatus(state, "-N");
|
1715
|
}else if (highestStatus.equals(getStatus(state, "P"))){
|
1716
|
result = getStatus(state, "-P");
|
1717
|
}else if (highestStatus.equals(getStatus(state, "A"))){
|
1718
|
result = getStatus(state, "-A");
|
1719
|
}else if (highestStatus.equals(getStatus(state, "C"))){
|
1720
|
result = getStatus(state, "-C");
|
1721
|
}
|
1722
|
logger.warn(state.getCurrentLine() + ": Absent province status could not be defined for highest status " + highestStatus.getTitleCache());
|
1723
|
return result;
|
1724
|
}
|
1725
|
|
1726
|
|
1727
|
/**
|
1728
|
* @param string
|
1729
|
* @return
|
1730
|
* @throws UndefinedTransformerMethodException
|
1731
|
*/
|
1732
|
private PresenceAbsenceTerm getStatus(CubaImportState state, String key) throws UndefinedTransformerMethodException {
|
1733
|
PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(key);
|
1734
|
if (status == null){
|
1735
|
UUID statusUuid = state.getTransformer().getPresenceTermUuid(key);
|
1736
|
status = getPresenceTerm(state, statusUuid, null, null, null, false);
|
1737
|
}
|
1738
|
return status;
|
1739
|
}
|
1740
|
|
1741
|
|
1742
|
/**
|
1743
|
* Stores parent-child, synonym and common name relationships
|
1744
|
*/
|
1745
|
@Override
|
1746
|
protected void secondPass(CubaImportState state) {
|
1747
|
// CyprusRow cyprusRow = state.getCyprusRow();
|
1748
|
return;
|
1749
|
}
|
1750
|
|
1751
|
|
1752
|
@Override
|
1753
|
protected boolean isIgnore(CubaImportState state) {
|
1754
|
return ! state.getConfig().isDoTaxa();
|
1755
|
}
|
1756
|
|
1757
|
@Override
|
1758
|
protected boolean doCheck(CubaImportState state) {
|
1759
|
logger.warn("DoCheck not yet implemented for CubaExcelImport");
|
1760
|
return true;
|
1761
|
}
|
1762
|
|
1763
|
}
|