Merge branch 'master' of ssh://dev.e-taxonomy.eu/var/git/cdmlib-apps
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / cuba / CubaExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.cuba;
11
12 import java.util.ArrayList;
13 import java.util.Arrays;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.List;
17 import java.util.Set;
18 import java.util.UUID;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21
22 import org.apache.commons.lang.StringUtils;
23 import org.apache.log4j.Logger;
24 import org.springframework.stereotype.Component;
25
26 import eu.etaxonomy.cdm.common.CdmUtils;
27 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
28 import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
29 import eu.etaxonomy.cdm.model.agent.Person;
30 import eu.etaxonomy.cdm.model.agent.Team;
31 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
32 import eu.etaxonomy.cdm.model.common.Annotation;
33 import eu.etaxonomy.cdm.model.common.AnnotationType;
34 import eu.etaxonomy.cdm.model.common.CdmBase;
35 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
36 import eu.etaxonomy.cdm.model.common.Language;
37 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
38 import eu.etaxonomy.cdm.model.common.Representation;
39 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
40 import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
41 import eu.etaxonomy.cdm.model.description.Distribution;
42 import eu.etaxonomy.cdm.model.description.Feature;
43 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
44 import eu.etaxonomy.cdm.model.description.TaxonDescription;
45 import eu.etaxonomy.cdm.model.description.TaxonInteraction;
46 import eu.etaxonomy.cdm.model.description.TextData;
47 import eu.etaxonomy.cdm.model.location.NamedArea;
48 import eu.etaxonomy.cdm.model.name.BotanicalName;
49 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
50 import eu.etaxonomy.cdm.model.name.NameRelationship;
51 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
52 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
53 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
54 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
55 import eu.etaxonomy.cdm.model.name.Rank;
56 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
57 import eu.etaxonomy.cdm.model.reference.Reference;
58 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
59 import eu.etaxonomy.cdm.model.taxon.Classification;
60 import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
61 import eu.etaxonomy.cdm.model.taxon.SynonymRelationship;
62 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
63 import eu.etaxonomy.cdm.model.taxon.Taxon;
64 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
65 import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
66 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
67 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
68
69 /**
70 * @author a.mueller
71 * @created 05.01.2016
72 */
73
74 @Component
75 public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
76 private static final long serialVersionUID = -747486709409732371L;
77 private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
78
79 private static final String HOMONYM_MARKER = "\\s+homon.?$";
80 private static final String DOUBTFUL_MARKER = "^\\?\\s?";
81
82
83 private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
84 private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
85
86 private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
87 private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
88
89 private static List<String> expectedKeys= Arrays.asList(new String[]{
90 "Fam. default","Fam. FRC","Fam. A&S","Fam. FC",
91 "Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
92
93 @Override
94 protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
95 //we do everything in firstPass here
96 return;
97 }
98
99
100 /**
101 * @param record
102 * @param state
103 * @param taxon
104 */
105 private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
106 try {
107 NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("Cu"), null, null, null, null, null);
108 TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
109 List<PresenceAbsenceTerm> statuss = makeCubanStatuss(record, state);
110 for (PresenceAbsenceTerm status : statuss){
111 Distribution distribution = Distribution.NewInstance(cuba, status);
112 desc.addElement(distribution);
113 distribution.addSource(makeDescriptionSource(state));
114 }
115 } catch (UndefinedTransformerMethodException e) {
116 e.printStackTrace();
117 }
118 }
119
120
121 /**
122 * @param record
123 * @param state
124 * @return
125 * @throws UndefinedTransformerMethodException
126 */
127 private List<PresenceAbsenceTerm> makeCubanStatuss(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
128 PresenceAbsenceTerm highestStatus = null;
129
130 String line = state.getCurrentLine() + ": ";
131 List<PresenceAbsenceTerm> result = new ArrayList<>();
132
133 String endemicStr = getValue(record, "End");
134 String indigenousStr = getValue(record, "Ind");
135 String indigenousDoubtStr = getValue(record, "Ind? D");
136 String naturalisedStr = getValue(record, "Nat");
137 String dudStr = getValue(record, "Dud P");
138 String advStr = getValue(record, "Adv");
139 String cultStr = getValue(record, "Cult C");
140
141 state.setEndemic(false);
142
143 if (endemicStr != null){
144 if(endemicStr.equals("+")){
145 PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
146 result.add(endemicState);
147 highestStatus = endemicState;
148 state.setEndemic(true);
149 }else if(isMinus(endemicStr)){
150 UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
151 PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
152 result.add(endemicState);
153 checkAbsentHighestState(highestStatus, line, "endemic", false);
154 }else if(endemicStr.equals("?")){
155 UUID endemicDoubtfulUuid = state.getTransformer().getPresenceTermUuid("?E");
156 PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicDoubtfulUuid, null, null, null, false);
157 result.add(endemicState);
158 checkAbsentHighestState(highestStatus, line, "endemic", false);
159 }else{
160 logger.warn(line + "Endemic not recognized: " + endemicStr);
161 }
162 }
163 if (indigenousStr != null){
164 if(indigenousStr.equals("+")){
165 PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("Ind.");
166 // PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
167 result.add(indigenousState);
168 highestStatus = highestStatus != null ? highestStatus : indigenousState;
169 }else if(isMinus(indigenousStr)){
170 PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("-Ind.");
171 result.add(indigenousState);
172 checkAbsentHighestState(highestStatus, line, "indigenous", false);
173 }else if(indigenousStr.equals("?")){
174 PresenceAbsenceTerm indigenousDoubtState = state.getTransformer().getPresenceTermByKey("?Ind.");
175 // PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
176 result.add(indigenousDoubtState);
177 checkAbsentHighestState(highestStatus, line, "indigenous", true);
178 }else{
179 logger.warn(line + "Indigenous not recognized: " + indigenousStr);
180 }
181 }
182 if(indigenousDoubtStr != null){
183 if(indigenousDoubtStr.equals("D")){
184 PresenceAbsenceTerm doubtIndigenousState = state.getTransformer().getPresenceTermByKey("Ind.?");
185 // PresenceAbsenceTerm doubtIndigenousState = getPresenceTerm(state, doubtIndigenousUuid, null, null, null, false);
186 result.add(doubtIndigenousState);
187 highestStatus = highestStatus != null ? highestStatus : doubtIndigenousState;
188 }else if(isMinus(indigenousDoubtStr)){
189 UUID doubtIndigenousErrorUuid = state.getTransformer().getPresenceTermUuid("-Ind.?");
190 PresenceAbsenceTerm doubtIndigenousErrorState = getPresenceTerm(state, doubtIndigenousErrorUuid, null, null, null, false);
191 result.add(doubtIndigenousErrorState);
192 checkAbsentHighestState(highestStatus, line, "doubtfully indigenous", true);
193 }else{
194 logger.warn(line + "doubtfully indigenous not recognized: " + indigenousDoubtStr);
195 }
196 }
197 if(naturalisedStr != null){
198 if(naturalisedStr.equals("N")){
199 PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
200 result.add(haturalizedState);
201 highestStatus = highestStatus != null ? highestStatus : haturalizedState;
202 }else if(isMinus(naturalisedStr)){
203 UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
204 PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
205 result.add(naturalisedErrorState);
206 checkAbsentHighestState(highestStatus, line, "naturalized", false);
207 }else if(naturalisedStr.equals("?")){
208 UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
209 PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
210 result.add(naturalisedDoubtState);
211 checkAbsentHighestState(highestStatus, line, "naturalized", true);
212 }else{
213 logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
214 }
215 }
216 if(dudStr != null){
217 if(dudStr.equals("P")){
218 UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
219 PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
220 result.add(dudState);
221 highestStatus = highestStatus != null ? highestStatus : dudState;
222 }else if(isMinus(dudStr)){
223 UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
224 PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
225 result.add(nonNativeErrorState);
226 checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", false);
227 }else if(dudStr.equals("?")){
228 UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
229 PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
230 result.add(naturalisedDoubtState);
231 checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", true);
232 }else{
233 logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
234 }
235 }
236 if(advStr != null){
237 if(advStr.equals("A")){
238 PresenceAbsenceTerm advState = state.getTransformer().getPresenceTermByKey("Adv.");
239 // PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
240 result.add(advState);
241 highestStatus = highestStatus != null ? highestStatus : advState;
242 }else if(isMinus(advStr)){
243 UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
244 PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
245 result.add(advState);
246 checkAbsentHighestState(highestStatus, line, "adventive", false);
247 }else if(advStr.equals("(A)")){
248 UUID rareCasualUuid = state.getTransformer().getPresenceTermUuid("(A)");
249 PresenceAbsenceTerm rareCasual = getPresenceTerm(state, rareCasualUuid, null, null, null, false);
250 result.add(rareCasual);
251 }else{
252 logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
253 }
254 }else if(cultStr != null){
255 if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
256 logger.warn("'cultivated' not recognized: " + cultStr);
257 }else if(cultStr.equals("C")){
258 PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
259 result.add(cultivatedState);
260 highestStatus = highestStatus != null ? highestStatus : cultivatedState;
261 }else if(cultStr.equals("?")){
262 PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
263 result.add(cultivatedState);
264 checkAbsentHighestState(highestStatus, line, "cultivated", true);
265 }else if(cultStr.equals("(C)")){
266 UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
267 PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
268 result.add(cultivatedState);
269 }else if(isMinus(cultStr)){
270 PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
271 result.add(cultivatedState);
272 checkAbsentHighestState(highestStatus, line, "cultivated", false);
273 }else{
274 logger.warn(line + "'cultivated' not recognized: " + cultStr);
275 }
276 }
277 state.setHighestStatusForTaxon(highestStatus);
278 return result;
279 }
280
281
282 /**
283 * @param highestStatus
284 * @param line
285 */
286 private void checkAbsentHighestState(PresenceAbsenceTerm highestStatus, String line, String stateLabel, boolean doubtful) {
287 //can be removed, highest status is not used anymore
288 if (highestStatus == null){
289 String absentStr = doubtful ? "doubtful" : "absent";
290 logger.info(line + "Highest cuban state is " + absentStr + " " + stateLabel);
291 }
292
293 }
294
295
296 /**
297 * @param indigenousStr
298 * @return
299 */
300 private boolean isMinus(String str) {
301 return str.equals("-") || str.equals("–") || str.equals("‒");
302 }
303
304
305 /**
306 * @param indigenousStr
307 * @return
308 */
309 private boolean checkPlusMinusDoubt(String str) {
310 return str.equals("+") || isMinus(str)|| str.equals("?");
311 }
312
313
314 /**
315 * @param indigenousStr
316 * @param indigenousDoubtStr
317 * @param naturalisedStr
318 * @param dudStr
319 * @param advStr
320 * @param cultStr
321 */
322 private boolean checkAllNull(String ... others) {
323 for (String other : others){
324 if (other != null){
325 return false;
326 }
327 }
328 return true;
329 }
330
331
332 private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
333 // String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
334 // + "(\\((.{6,})\\))?";
335 private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
336 +"(\\((.{6,})\\))?";
337 private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
338 +"(\\((.{6,})\\))?";
339 private static final String auctRegExStr = "auct\\."
340 +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sCombs|\\sBritton|\\sGriseb\\.(\\sFC-S|\\sA&S)?|\\sWright"
341 + "|\\sHammer|\\sEngl\\.||\\sMaza|\\sMiers|\\sRoig|\\sBorhidi|\\sFRC|\\sCoL"
342 + "|\\sAckerman|\\sMújica|\\sDíaz|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
343
344
345 private static final String missapliedRegExStr = "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
346 private static final String sphalmRegExStr = "“(.*{5,})”\\s+((FC-S|A&S)\\s)?sphalm\\.(\\s(FC(-S)?|A&S|inval\\.))?";
347 private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.(\\s(West|Moldenke|FC|Jacq.))?";
348 private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
349
350 private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
351 private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
352 private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
353 private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
354 private static final Pattern sphalmRegEx = Pattern.compile(sphalmRegExStr);
355
356 /**
357 * @param record
358 * @param state
359 * @param taxon
360 */
361 private void makeSynonyms(HashMap<String, String> record, CubaImportState state, boolean isFirstSynonym) {
362 // boolean forAccepted = true;
363 String synonymStr = record.get("Syn.");
364 String line = state.getCurrentLine() + ": ";
365
366
367 if (synonymStr == null){
368 //TODO test that this is not a synonym only line
369 return;
370 }
371
372 if (state.getCurrentTaxon() == null){
373 logger.error(line + "Current taxon is null for synonym");
374 return;
375 }
376
377
378 synonymStr = synonymStr.trim();
379 synonymStr = synonymStr.replace("[taxon]", "[infraspec.]");
380
381 // String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
382 // String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
383
384 // Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
385
386
387 Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
388 Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
389 Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
390 Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
391 Matcher sphalmMatcher = sphalmRegEx.matcher(synonymStr);
392
393 List<BotanicalName> homonyms = new ArrayList<>();
394 if (missapliedMatcher.matches()){
395 boolean doubtful = missapliedMatcher.group(1) != null;
396 String firstPart = missapliedMatcher.group(2);
397 BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
398 name.addSource(makeOriginalSource(state));
399
400 String secondPart = missapliedMatcher.group(3);
401 Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
402 misappliedNameTaxon.addSource(makeOriginalSource(state));
403 misappliedNameTaxon.setDoubtful(doubtful);
404 if (secondPart.startsWith("sensu")){
405 secondPart = secondPart.substring(5).trim();
406 if (secondPart.contains(" ")){
407 logger.warn(line + "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart);
408 }
409 Reference<?> sensu = ReferenceFactory.newGeneric();
410 Team team = Team.NewTitledInstance(secondPart, null);
411 sensu.setAuthorship(team);
412 misappliedNameTaxon.setSec(sensu);
413 }else if (secondPart.matches(auctRegExStr)){
414 secondPart = secondPart.replace("p. p.", "p.p.");
415 misappliedNameTaxon.setAppendedPhrase(secondPart);
416 }else{
417 logger.warn(line + "Misapplied second part not recognized: " + secondPart);
418 }
419 //TODO
420 Reference<?> relRef = null;
421 state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
422 }else if (nomInvalMatcher.matches()){
423 String firstPart = nomInvalMatcher.group(1);
424 String afterInval = nomInvalMatcher.group(2);
425 if (StringUtils.isNotBlank(afterInval)){
426 logger.warn(state.getCurrentLine() + ": After inval to be implemented: " + afterInval);
427 }
428 BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
429 name.addSource(makeOriginalSource(state));
430 NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
431 name.addStatus(status);
432 SynonymRelationship sr = state.getCurrentTaxon().addSynonymName(name, SynonymRelationshipType.SYNONYM_OF());
433 sr.getSynonym().addSource(makeOriginalSource(state));
434 }else if (sphalmMatcher.matches()){
435 String firstPart = sphalmMatcher.group(1);
436 String sphalmPart = synonymStr.replace(firstPart, "").replace("“","").replace("”","").trim();
437 BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
438 // NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
439 // name.addStatus(status);
440 name.addSource(makeOriginalSource(state));
441 SynonymRelationship sr = state.getCurrentTaxon().addSynonymName(name, SynonymRelationshipType.SYNONYM_OF());
442 sr.getSynonym().setAppendedPhrase(sphalmPart);
443 sr.getSynonym().setSec(null);
444 sr.getSynonym().addSource(makeOriginalSource(state));
445 }else if (acceptedMatcher.matches()){
446 String firstPart = acceptedMatcher.group(1);
447 String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
448 List<BotanicalName> list = handleHomotypicGroup(firstPart, state, (BotanicalName)state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
449 checkFirstSynonym(state, list, isFirstSynonym, synonymStr, false);
450 }else if(heterotypicMatcher.matches()){
451 String firstPart = heterotypicMatcher.group(1).trim();
452 String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
453 String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
454 boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
455 firstPart = replaceHomonIlleg(firstPart);
456 boolean isHomonym = firstPart.matches(".*" + HOMONYM_MARKER);
457 BotanicalName synName = makeName(state, firstPart);
458 if (synName.isProtectedTitleCache()){
459 logger.warn(line + "Heterotypic base synonym could not be parsed correctly: " + firstPart);
460 }
461 if (isHomonym){
462 homonyms.add(synName);
463 }
464 SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
465 sr.getSynonym().setDoubtful(isDoubtful);
466 sr.getSynonym().addSource(makeOriginalSource(state));
467 List<BotanicalName> list = handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
468 checkFirstSynonym(state, list, isFirstSynonym, synonymStr, true);
469
470 }else if (isSpecialHeterotypic(synonymStr)){
471 BotanicalName synName = makeName(state, synonymStr);
472 if (synName.isProtectedTitleCache()){
473 logger.warn(line + "Special heterotypic synonym could not be parsed correctly:" + synonymStr);
474 }
475 SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
476 sr.getSynonym().addSource(makeOriginalSource(state));
477 }else{
478 logger.warn(line + "Synonym entry does not match: " + synonymStr);
479 }
480 }
481
482 /**
483 * @param state
484 * @param list
485 * @param isFirstSynonym
486 * @param synonymStr
487 * @param b
488 */
489 private void checkFirstSynonym(CubaImportState state, List<BotanicalName> list, boolean isFirstSynonym, String synonymStr, boolean isHeterotypicMatcher) {
490 if (!isFirstSynonym){
491 return;
492 }
493 String line = state.getCurrentLine() + ": ";
494 BotanicalName currentName = isHeterotypicMatcher? (BotanicalName)state.getCurrentTaxon().getName(): list.get(0);
495 boolean currentHasBasionym = currentName.getBasionymAuthorship() != null;
496 BotanicalName firstSynonym = isHeterotypicMatcher ? list.get(0): list.get(1);
497 // if (list.size() <= 1){
498 // logger.error(line + "homotypic list size is 1 but shouldn't");
499 // return;
500 // }
501 if (isHeterotypicMatcher && currentHasBasionym){
502 logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has basionym author but has no homotypic basionym , but : " + synonymStr);
503 }else if (isHeterotypicMatcher){
504 //first synonym must not have a basionym author
505 if (firstSynonym.getBasionymAuthorship() != null){
506 logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has no basionym but first synonym requires basionym : " + synonymStr);
507 }
508 }else{ //isAcceptedMatcher
509 if (currentHasBasionym){
510 if (! matchAuthor(currentName.getBasionymAuthorship(), firstSynonym.getCombinationAuthorship())){
511 logger.info(line + "Current basionym author and first synonym combination author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
512 }
513 }else{
514 if (! matchAuthor(currentName.getCombinationAuthorship(), firstSynonym.getBasionymAuthorship())){
515 logger.info(line + "Current combination author and first synonym basionym author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
516 }
517 }
518 }
519
520 }
521
522
523 /**
524 * @param synonymStr
525 * @return
526 */
527 private boolean isSpecialHeterotypic(String synonymStr) {
528 if (synonymStr == null){
529 return false;
530 }else if (synonymStr.equals("Rhynchospora prenleloupiana (‘prenteloupiana’) Boeckeler")){
531 return true;
532 }else if (synonymStr.equals("Psidium longipes var. orbiculare (O.Berg) McVaugh")){
533 return true;
534 }
535 return false;
536 }
537
538
539 /**
540 * @param areaKey
541 * @param record
542 * @param state
543 * @param taxon
544 */
545 private void makeSingleProvinceDistribution(String areaKey,
546 HashMap<String, String> record,
547 CubaImportState state) {
548 try {
549 UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
550 if (areaUuid == null){
551 logger.warn("Area not recognized: " + areaKey);
552 return;
553 }
554 if (record.get(areaKey)==null){
555 return; //no status defined
556 }
557
558 NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
559 if (area == null){
560 logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
561 }
562 TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
563 PresenceAbsenceTerm status = makeProvinceStatus(areaKey, record, state);
564 if (status == null){
565 logger.warn(state.getCurrentLine() + ": Province distribution status could not be defined: " + record.get(areaKey));
566 }
567 Distribution distribution = Distribution.NewInstance(area, status);
568 desc.addElement(distribution);
569 distribution.addSource(makeDescriptionSource(state));
570 } catch (UndefinedTransformerMethodException e) {
571 e.printStackTrace();
572 }
573
574 }
575
576
577 /**
578 * @param synonymStr
579 * @param state
580 * @param homonyms
581 * @param homonymPart
582 * @param isDoubtful
583 * @param taxon
584 * @param homotypicalGroup
585 */
586 private List<BotanicalName> handleHomotypicGroup(String homotypicStrOrig,
587 CubaImportState state,
588 BotanicalName homotypicName,
589 boolean isHeterotypic,
590 List<BotanicalName> homonyms,
591 String homonymPart,
592 boolean isDoubtful) {
593
594 List<BotanicalName> homotypicNameList = new ArrayList<>();
595 homotypicNameList.add(homotypicName);
596
597 String homotypicStr = homotypicStrOrig;
598 if (homotypicStr == null){
599 return homotypicNameList;
600 }else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
601 homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
602 }
603
604 HomotypicalGroup homotypicGroup = homotypicName.getHomotypicalGroup();
605 String[] splits = homotypicStr.split("\\s*,\\s*");
606 for (String split : splits){
607 split = replaceHomonIlleg(split);
608 boolean isHomonym = split.matches(".*" + HOMONYM_MARKER);
609 BotanicalName newName = makeName(state, split);
610 newName.setHomotypicalGroup(homotypicGroup); //not really necessary as this is later set anyway
611 if (newName.isProtectedTitleCache()){
612 logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
613 }
614 if (isHomonym){
615 homonyms.add(newName);
616 }
617 if (isHeterotypic){
618 SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(newName, homotypicGroup, null, null);
619 sr.getSynonym().setDoubtful(isDoubtful);
620 sr.getSynonym().addSource(makeOriginalSource(state));
621 // newName.addBasionym(homotypicName);
622 }else{
623 state.getCurrentTaxon().addHomotypicSynonymName(newName, null, null);
624 }
625 handleBasionym(state, homotypicNameList, homonyms, newName);
626 homotypicNameList.add(newName);
627 }
628 makeHomonyms(homonyms, homonymPart, state, homotypicGroup);
629 return homotypicNameList;
630 }
631
632
633 /**
634 * @param split
635 * @return
636 */
637 private String replaceHomonIlleg(String split) {
638 String result = split.trim().replace("homon. illeg.", "nom. illeg. homon.").trim();
639 return result;
640 }
641
642
643 /**
644 * @param homonyms
645 * @param homonymPart
646 * @param state
647 * @param currentBasionym
648 */
649 private void makeHomonyms(List<BotanicalName> homonyms, String homonymPartOrig, CubaImportState state,
650 HomotypicalGroup homotypicGroup) {
651 String line = state.getCurrentLine() + ": ";
652 String homonymPart = homonymPartOrig == null ? "" : homonymPartOrig.trim();
653 if (homonyms.isEmpty() && homonymPart.equals("")){
654 return;
655 }else if (homonymPart.equals("")){
656 logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
657 return;
658 }
659 homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
660 String[] splits = homonymPart.split("\\]\\s*\\[");
661 if (splits.length != homonyms.size()){
662 if(homonyms.size() == 0 && splits.length >= 1){
663 handleSimpleBlockingNames(splits, state, homotypicGroup);
664 }else{
665 logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
666 }
667 return;
668 }
669 int i = 0;
670 for (String split : splits){
671 split = split.replaceAll("^non\\s+", "");
672 BotanicalName newName = makeName(state, split);
673 // BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
674 if (newName.isProtectedTitleCache()){
675 logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
676 }
677 homonyms.get(i).addRelationshipToName(newName, NameRelationshipType.LATER_HOMONYM(), null);
678 i++;
679 }
680 }
681
682 /**
683 * @param homonymPart
684 * @param state
685 * @param homotypicGroup
686 */
687 private void handleSimpleBlockingNames(String[] splitsi,
688 CubaImportState state,
689 HomotypicalGroup homotypicGroup) {
690 List<BotanicalName> replacementNameCandidates = new ArrayList<>();
691 for (String spliti : splitsi){
692
693 String split = spliti.replaceAll("^non\\s+", "");
694 BotanicalName newName = makeName(state, split);
695 if (newName.isProtectedTitleCache()){
696 logger.warn(state.getCurrentLine() + ": blocking name could not be parsed: " + split);
697 }
698 Set<BotanicalName> typifiedNames = (Set)homotypicGroup.getTypifiedNames();
699 Set<BotanicalName> candidates = new HashSet<>();
700 for (BotanicalName name : typifiedNames){
701 if (name.getGenusOrUninomial() != null && name.getGenusOrUninomial().equals(newName.getGenusOrUninomial())){
702 if (name.getStatus().isEmpty() || ! name.getStatus().iterator().next().getType().equals(NomenclaturalStatusType.ILLEGITIMATE())){
703 candidates.add(name);
704 }
705 }
706 }
707 if (candidates.size() == 1){
708 BotanicalName blockedName = candidates.iterator().next();
709 newName.addRelationshipToName(blockedName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
710 replacementNameCandidates.add(blockedName);
711 }else{
712 logger.warn(state.getCurrentLine() + ": Blocking name could not be handled. " + candidates.size() + " candidates.");
713 }
714 }
715 makeReplacedSynonymIfPossible(state, homotypicGroup, replacementNameCandidates);
716 }
717
718 /**
719 * @param homotypicGroup
720 * @param replacementNameCandidates
721 */
722 private void makeReplacedSynonymIfPossible(CubaImportState state,
723 HomotypicalGroup homotypicGroup,
724 List<BotanicalName> replacementNameCandidates) {
725 String line = state.getCurrentLine() +": ";
726 List<BotanicalName> replacedCandidates = new ArrayList<>();
727 for (TaxonNameBase<?, ?> typifiedName : homotypicGroup.getTypifiedNames()){
728 BotanicalName candidate = (BotanicalName)typifiedName;
729 if (candidate.getBasionymAuthorship() == null){
730 if (candidate.getStatus().isEmpty()){
731 if (! replacementNameCandidates.contains(candidate)){
732 replacedCandidates.add(candidate);
733 }
734 }
735 }
736 }
737 if (replacedCandidates.size() == 1){
738 BotanicalName replacedSynonym = replacedCandidates.iterator().next();
739 for (BotanicalName replacementName : replacementNameCandidates){
740 replacementName.addReplacedSynonym(replacedSynonym, null, null, null);
741 }
742 }else if (replacedCandidates.size() < 1){
743 logger.warn(line + "No replaced synonym candidate found");
744 }else{
745 logger.warn(line + "More than 1 ("+replacedCandidates.size()+") replaced synonym candidates found");
746 }
747 }
748
749
750 /**
751 * @param homotypicGroup
752 * @param newName
753 */
754 private void handleBasionym(CubaImportState state, List<BotanicalName> homotypicNameList,
755 List<BotanicalName> homonyms, BotanicalName newName) {
756 for (BotanicalName existingName : homotypicNameList){
757 if (existingName != newName){ //should not happen anymore, as new name is added later
758 boolean onlyIfNotYetExists = true;
759 createBasionymRelationIfPossible(state, existingName, newName, homonyms.contains(newName), onlyIfNotYetExists);
760 }
761 }
762 }
763
764 /**
765 * @param state
766 * @param name1
767 * @param name2
768 * @return
769 */
770 private void createBasionymRelationIfPossible(CubaImportState state, BotanicalName name1, BotanicalName name2,
771 boolean name2isHomonym, boolean onlyIfNotYetExists) {
772 BotanicalName basionymName = name1;
773 BotanicalName newCombination = name2;
774 //exactly one name must have a basionym author
775 if (name1.getBasionymAuthorship() == null && name2.getBasionymAuthorship() == null
776 || name1.getBasionymAuthorship() != null && name2.getBasionymAuthorship() != null){
777 return;
778 }
779
780 //switch order if necessary
781 if (! name2isHomonym && basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
782 basionymName = name2;
783 newCombination = name1;
784 }
785 if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())
786 && matchLastNamePart(basionymName, newCombination)){
787 newCombination.addBasionym(basionymName);
788 }else{
789 if ( (newCombination.getBasionyms().isEmpty() || ! onlyIfNotYetExists)
790 && isLegitimate(basionymName)
791 && ! name2isHomonym){
792 logger.info(state.getCurrentLine() + ": Names are potential basionyms but either author or name part do not match: " + basionymName.getTitleCache() + " <-> " + newCombination.getTitleCache());
793 }
794 }
795 }
796
797 /**
798 * @param basionymName
799 * @return
800 */
801 private boolean isLegitimate(BotanicalName basionymName) {
802 for (NomenclaturalStatus nomStatus : basionymName.getStatus()){
803 if (nomStatus.getType()!= null && nomStatus.getType().isIllegitimateType()){
804 return false;
805 }
806 }
807 for (NameRelationship nameRel : basionymName.getNameRelations()){
808 if (nameRel.getType()!= null && nameRel.getType().isIllegitimateType()){
809 return false;
810 }
811 }
812 return true;
813 }
814
815
816 /**
817 * @param basionymName
818 * @param newCombination
819 * @return
820 */
821 private boolean matchLastNamePart(BotanicalName name1, BotanicalName name2) {
822 String lastNamePart1 = name1.getLastNamePart();
823 String lastNamePart2 = name2.getLastNamePart();
824 if (lastNamePart1 != null && lastNamePart2 != null){
825 lastNamePart1 = normalizeBasionymNamePart(lastNamePart1);
826 lastNamePart2 = normalizeBasionymNamePart(lastNamePart2);
827 return (lastNamePart1.equals(lastNamePart2));
828 }else{
829 return false;
830 }
831 }
832
833 /**
834 * @param lastNamePart1
835 * @return
836 */
837 private String normalizeBasionymNamePart(String lastNamePart) {
838 String namePart = lastNamePart.toLowerCase()
839 .replaceAll("(um|us|a|is|e|os|on|or)$", "")
840 .replaceAll("er$", "r") //e.g. ruber <-> rubra
841 .replaceAll("ese$", "s"); //e.g. cayanensis <-> cayanenese
842 //TODO tampensis / tampense
843 return namePart;
844 }
845
846
847 /**
848 * @param combinationAuthorship
849 * @param basi
850 * @return
851 */
852 private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
853 if (author1 == null || author2 == null){
854 return false;
855 }else {
856 return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
857 }
858 }
859
860
861 /**
862 * @param record
863 * @param state
864 * @param taxon
865 */
866 private void makeNotes(HashMap<String, String> record, CubaImportState state) {
867 String notesStr = getValue(record, "(Notas)");
868 if (notesStr == null){
869 return;
870 }else{
871 Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
872 //TODO
873 annotation.setAnnotationType(AnnotationType.TECHNICAL());
874 state.getCurrentTaxon().addAnnotation(annotation);
875 }
876 }
877
878
879 /**
880 * @param record
881 * @param state
882 * @param familyTaxon
883 * @return
884 */
885 private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
886 String taxonStrOrig = getValue(record, "Taxón");
887 if (taxonStrOrig == null){
888 return isSynonym ? state.getCurrentTaxon() : null;
889 }
890
891 boolean isAbsent = false;
892 String taxonStr = taxonStrOrig;
893 if (taxonStrOrig.startsWith("[") && taxonStrOrig.endsWith("]")){
894 taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
895 isAbsent = true;
896 }
897
898 boolean isAuct = false;
899 if (taxonStr.endsWith("auct.")){
900 isAuct = true;
901 taxonStr.replace("auct.", "").trim();
902 }
903 state.setTaxonIsAbsent(isAbsent);
904 BotanicalName botanicalName = makeName(state, taxonStr);
905 Reference<?> sec = getSecReference(state);
906 Taxon taxon = Taxon.NewInstance(botanicalName, sec);
907 if (isAuct){
908 taxon.setAppendedPhrase("auct.");
909 }
910
911 TaxonNode higherNode;
912 if (botanicalName.isProtectedTitleCache()){
913 logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStrOrig);
914 higherNode = familyNode;
915 }else{
916 String genusStr = botanicalName.getGenusOrUninomial();
917 Taxon genus = state.getHigherTaxon(genusStr);
918 if (genus != null){
919 higherNode = genus.getTaxonNodes().iterator().next();
920 }else{
921 BotanicalName name = BotanicalName.NewInstance(Rank.GENUS());
922 name.addSource(makeOriginalSource(state));
923 name.setGenusOrUninomial(genusStr);
924 genus = Taxon.NewInstance(name, sec);
925 genus.addSource(makeOriginalSource(state));
926 higherNode = familyNode.addChildTaxon(genus, null, null);
927 state.putHigherTaxon(genusStr, genus);
928 }
929 }
930 if(isAbsent){
931 botanicalName.setTitleCache(taxonStrOrig, true);
932 taxon.setExcluded(true);
933 }
934
935 higherNode.addChildTaxon(taxon, null, null);
936 taxon.addSource(makeOriginalSource(state));
937
938 return taxon;
939 }
940
941 private final String orthVarRegExStr = "[A-Z][a-z]+\\s[a-z]+\\s(\\(‘([a-z]){3,}’\\))\\s(\\([A-Z][a-z]+\\.?\\)\\s)?[A-Z][a-zó]+\\.?";
942 private final Pattern orthVarRegEx = Pattern.compile(orthVarRegExStr);
943 /**
944 * @param taxonStr
945 * @return
946 */
947 private BotanicalName makeName(CubaImportState state, String nameStrOrig) {
948 //normalize
949 String nameStr = normalizeStatus(nameStrOrig);
950 //orthVar
951 Matcher orthVarMatcher = orthVarRegEx.matcher(nameStr);
952 String orthVar = null;
953 if (orthVarMatcher.matches()) {
954 orthVar = orthVarMatcher.group(1);
955 nameStr = nameStr.replace(" " + orthVar, "").trim().replaceAll("\\s{2,}", " ");
956 orthVar = orthVar.substring(2, orthVar.length() - 2);
957 }
958
959 boolean isNomInval = false;
960 if (nameStr.endsWith("nom. inval.")){
961 isNomInval = true;
962 nameStr = nameStr.replace("nom. inval.", "").trim();
963 }
964
965 BotanicalName result = (BotanicalName)nameParser.parseReferencedName(nameStr, nc, Rank.SPECIES());
966 result.addSource(makeOriginalSource(state));
967 if (isNomInval){
968 result.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.INVALID()));
969 }
970 if (orthVar != null){
971 BotanicalName orthVarName = (BotanicalName)result.clone();
972 orthVarName.addSource(makeOriginalSource(state));
973 //TODO
974 Reference<?> citation = null;
975 orthVarName.addRelationshipToName(result, NameRelationshipType.ORTHOGRAPHIC_VARIANT(), citation, null, null);
976 orthVarName.setSpecificEpithet(orthVar);
977 }
978 normalizeAuthors(result);
979 return result;
980
981 }
982
983 /**
984 * @param result
985 */
986 private void normalizeAuthors(BotanicalName result) {
987 result.setCombinationAuthorship(normalizeAuthor(result.getCombinationAuthorship()));
988 result.setExCombinationAuthorship(normalizeAuthor(result.getExCombinationAuthorship()));
989 result.setExBasionymAuthorship(normalizeAuthor(result.getExBasionymAuthorship()));
990 result.setBasionymAuthorship(normalizeAuthor(result.getBasionymAuthorship()));
991
992 }
993
994
995 /**
996 * @param combinationAuthorship
997 * @return
998 */
999 private TeamOrPersonBase<?> normalizeAuthor(TeamOrPersonBase<?> author) {
1000 if (author == null){
1001 return null;
1002 }
1003 TeamOrPersonBase<?> result;
1004 if (author.isInstanceOf(Person.class)){
1005 result = normalizePerson(CdmBase.deproxy(author, Person.class));
1006 }else{
1007 Team team = CdmBase.deproxy(author, Team.class);
1008 List<Person> list = team.getTeamMembers();
1009 for(int i = 0; i < list.size(); i++){
1010 Person person = list.get(i);
1011 Person tmpMember = normalizePerson(person);
1012 list.set(i, tmpMember);
1013 }
1014 return team;
1015 }
1016 return result;
1017 }
1018
1019
1020 /**
1021 * @param deproxy
1022 * @return
1023 */
1024 private Person normalizePerson(Person person) {
1025 String title = person.getNomenclaturalTitle();
1026 title = title.replaceAll("(?<=[a-zA-Z])\\.(?=[a-zA-Z])", ". ");
1027 person.setNomenclaturalTitle(title);
1028 boolean isFilius = title.endsWith(" f.");
1029 if (isFilius){
1030 title.replace(" f.", "");
1031 }
1032
1033 String[] splits = title.split("\\s+");
1034 int nNotFirstName = isFilius ? 2 : 1;
1035 person.setLastname(splits[splits.length - nNotFirstName] + (isFilius? " f." : ""));
1036 person.setFirstname(CdmUtils.concat(" ", Arrays.copyOfRange(splits, 0, splits.length-nNotFirstName)));
1037 return person;
1038 }
1039
1040
1041 /**
1042 * @param state
1043 * @return
1044 */
1045 private Reference<?> getSecReference(CubaImportState state) {
1046 Reference<?> result = state.getSecReference();
1047 if (result == null){
1048 result = ReferenceFactory.newDatabase();
1049 result.setTitle("Flora of Cuba");
1050 state.setSecReference(result);
1051 }
1052 return result;
1053 }
1054
1055
1056 private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
1057 "nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub.", "nom. nud."};
1058 /**
1059 * @param taxonStr
1060 * @return
1061 */
1062 private String normalizeStatus(String nameStr) {
1063 if (nameStr == null){
1064 return null;
1065 }
1066 String result = nameStr.replaceAll(HOMONYM_MARKER, "").trim();
1067 for (String nomStatusStr : nomStatusStrings){
1068 nomStatusStr = " " + nomStatusStr;
1069 if (result.endsWith(nomStatusStr)){
1070 result = result.replace(nomStatusStr, "," + nomStatusStr);
1071 }
1072 }
1073 result = result.replaceAll(DOUBTFUL_MARKER, "").trim();
1074 result = result.replace("[taxon]", "[infraspec.]");
1075 return result;
1076
1077
1078 }
1079
1080
1081 /**
1082 * @param record
1083 * @param state
1084 * @return
1085 */
1086 private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
1087 String familyStr = getValue(record, "Fam. default");
1088 if (familyStr == null){
1089 return null;
1090 }
1091 familyStr = familyStr.trim();
1092 String alternativeFamilyStr = null;
1093 if (familyStr.contains("/")){
1094 String[] splits = familyStr.split("/");
1095 if (splits.length > 2){
1096 logger.warn(state.getCurrentLine() +": " + "More than 1 alternative name:" + familyStr);
1097 }
1098 familyStr = splits[0].trim();
1099 alternativeFamilyStr = splits[1].trim();
1100 }
1101
1102 Taxon family = state.getHigherTaxon(familyStr);
1103 TaxonNode familyNode;
1104 if (family != null){
1105 familyNode = family.getTaxonNodes().iterator().next();
1106 }else{
1107 BotanicalName name = makeFamilyName(state, familyStr);
1108 Reference<?> sec = getSecReference(state);
1109 family = Taxon.NewInstance(name, sec);
1110 ITaxonTreeNode rootNode = getClassification(state);
1111 familyNode = rootNode.addChildTaxon(family, sec, null);
1112 state.putHigherTaxon(familyStr, family);
1113
1114 }
1115
1116 if (isNotBlank(alternativeFamilyStr)){
1117 NameRelationshipType type = NameRelationshipType.ALTERNATIVE_NAME();
1118 BotanicalName alternativeName = makeFamilyName(state, alternativeFamilyStr);
1119 BotanicalName familyName = (BotanicalName)family.getName();
1120 boolean hasRelation = false;
1121 for (NameRelationship nameRel : familyName.getRelationsToThisName()){
1122 if (nameRel.getType().equals(type)){
1123 if (nameRel.getFromName().equals(alternativeName)){
1124 hasRelation = true;
1125 }
1126 }
1127 }
1128 if (!hasRelation){
1129 familyName.addRelationshipFromName(alternativeName, type, null);
1130 }
1131
1132 }
1133
1134 return familyNode;
1135 }
1136
1137
1138 /**
1139 * @param state
1140 * @param taxon
1141 */
1142 private void validateTaxonIsAbsent(CubaImportState state, Taxon taxon) {
1143 if (!state.isTaxonIsAbsent()){
1144 return;
1145 }
1146
1147 for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
1148 if (el instanceof Distribution){
1149 Distribution dist = (Distribution)el;
1150 NamedArea area = dist.getArea();
1151 if (isCubanArea(area)){
1152 PresenceAbsenceTerm status = dist.getStatus();
1153 if (status != null && !status.isAbsenceTerm()){
1154 if (!isDoubtfulTerm(status)){
1155 String name = taxon.getName().getTitleCache();
1156 logger.error(state.getCurrentLine() +": Taxon ("+name+")is absent'[]' but has presence distribution: " + status.getTitleCache());
1157 return;
1158 }
1159 }
1160 }
1161 }
1162 }
1163 }
1164
1165 /**
1166 * @param state
1167 * @param taxon
1168 */
1169 private void validateEndemic(CubaImportState state, Taxon taxon) {
1170
1171 boolean hasExternalPresence = false;
1172 for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
1173 if (el instanceof Distribution){
1174 Distribution dist = (Distribution)el;
1175 NamedArea area = dist.getArea();
1176 if (!isCubanArea(area)){
1177 PresenceAbsenceTerm status = dist.getStatus();
1178 if (status != null && !status.isAbsenceTerm()){
1179 if (!isDoubtfulTerm(status)){
1180 hasExternalPresence = true;
1181 if (state.isEndemic()){
1182 String name = taxon.getName().getTitleCache();
1183 logger.error(state.getCurrentLine() +": Taxon ("+name+")is endemic but has non-cuban distribution: " + area.getIdInVocabulary() + "-" + status.getIdInVocabulary());
1184 return;
1185 }
1186 }
1187 }
1188 }
1189 }
1190 }
1191 if (!state.isEndemic() && ! hasExternalPresence){
1192 String name = taxon.getName().getTitleCache();
1193 logger.error(state.getCurrentLine() +": Taxon ("+name+")is not endemic but has no non-cuban distribution" );
1194 }
1195 }
1196
1197
1198 /**
1199 * @param state
1200 * @param taxon
1201 * @param famStr
1202 * @param famRef
1203 * @return
1204 */
1205 private Taxon makeAlternativeFamilyTaxon(CubaImportState state, String famStr, Reference<?> famRef) {
1206 String key = famRef.getTitle() + ":"+ famStr;
1207 Taxon family = state.getHigherTaxon(key);
1208 if (family == null){
1209 BotanicalName name = makeFamilyName(state, famStr);
1210 family = Taxon.NewInstance(name, famRef);
1211 state.putHigherTaxon(key, family);
1212 }
1213
1214 return family;
1215 }
1216
1217
1218 /**
1219 * @param state
1220 * @param famStr
1221 * @return
1222 */
1223 private BotanicalName makeFamilyName(CubaImportState state, String famStr) {
1224 BotanicalName name = state.getFamilyName(famStr);
1225 if (name == null){
1226 name = BotanicalName.NewInstance(Rank.FAMILY());
1227 name.setGenusOrUninomial(famStr);
1228 state.putFamilyName(famStr, name);
1229 name.addSource(makeOriginalSource(state));
1230 }
1231 return name;
1232 }
1233
1234
1235 /**
1236 * @param state
1237 * @return
1238 */
1239 private TaxonNode getClassification(CubaImportState state) {
1240 Classification classification = state.getClassification();
1241 if (classification == null){
1242 classification = getClassificationService().find(state.getConfig().getClassificationUuid());
1243 }
1244 TaxonNode rootNode = state.getRootNode();
1245 if (rootNode == null){
1246 rootNode = getTaxonNodeService().find(plantaeUuid);
1247 }
1248 if (rootNode == null){
1249 Reference<?> sec = getSecReference(state);
1250 if (classification == null){
1251 String classificationName = state.getConfig().getClassificationName();
1252 //TODO
1253 Language language = Language.DEFAULT();
1254 classification = Classification.NewInstance(classificationName, sec, language);
1255 state.setClassification(classification);
1256 classification.setUuid(state.getConfig().getClassificationUuid());
1257 classification.getRootNode().setUuid(rootUuid);
1258 }
1259
1260 BotanicalName plantaeName = BotanicalName.NewInstance(Rank.KINGDOM());
1261 plantaeName.setGenusOrUninomial("Plantae");
1262 Taxon plantae = Taxon.NewInstance(plantaeName, sec);
1263 TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
1264 plantaeNode.setUuid(plantaeUuid);
1265 state.setRootNode(plantaeNode);
1266 getClassificationService().save(classification);
1267
1268 rootNode = plantaeNode;
1269 }
1270 return rootNode;
1271 }
1272
1273
1274 /**
1275 * @param record
1276 * @param originalKey
1277 * @return
1278 */
1279 private String getValue(HashMap<String, String> record, String originalKey) {
1280 String value = record.get(originalKey);
1281 if (! StringUtils.isBlank(value)) {
1282 if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
1283 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
1284 return value;
1285 }else{
1286 return null;
1287 }
1288 }
1289
1290
1291
1292 /**
1293 * Stores taxa records in DB
1294 */
1295 @Override
1296 protected void firstPass(CubaImportState state) {
1297 boolean isSynonymOnly = false;
1298
1299 String line = state.getCurrentLine() + ": ";
1300 HashMap<String, String> record = state.getOriginalRecord();
1301
1302 Set<String> keys = record.keySet();
1303 for (String key: keys) {
1304 if (! expectedKeys.contains(key)){
1305 logger.warn(line + "Unexpected Key: " + key);
1306 }
1307 }
1308
1309 if (record.get("Fam. default") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
1310 //second header line, don't handle
1311 return;
1312 }
1313
1314 //Fam.
1315 TaxonNode familyTaxon = getFamilyTaxon(record, state);
1316 if (familyTaxon == null){
1317 if (record.get("Taxón") != null){
1318 logger.warn(line + "Family not recognized but taxon exists: " + record.get("Taxón"));
1319 return;
1320 }else if (record.get("Syn.") == null){
1321 logger.warn(line + "Family not recognized but also no synonym exists");
1322 return;
1323 }else{
1324 isSynonymOnly = true;
1325 }
1326 }
1327
1328 //Taxón
1329 Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonymOnly);
1330 if (taxon == null && ! isSynonymOnly){
1331 logger.warn(line + "taxon could not be created and is null");
1332 return;
1333 }
1334 state.setCurrentTaxon(taxon);
1335
1336 //Fam. ALT
1337 if (!isSynonymOnly){
1338 makeAlternativeFamilies(record, state, familyTaxon, taxon);
1339 }
1340
1341 //(Notas)
1342 makeNotes(record, state);
1343
1344 //Syn.
1345 makeSynonyms(record, state, !isSynonymOnly);
1346
1347 //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
1348 makeCubanDistribution(record, state);
1349
1350
1351 // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1352 // "CuC","VC","Ci","SS","CA","Cam","LT",
1353 // "CuE","Gr","Ho","SC","Gu",
1354 makeProvincesDistribution(record, state);
1355
1356 // "Esp","Ja","PR","Men","Bah","Cay",
1357 // "AmN","AmC","AmS","VM"});
1358 makeOtherAreasDistribution(record, state);
1359
1360 validateTaxonIsAbsent(state, taxon);
1361 if (!isSynonymOnly){
1362 validateEndemic(state, taxon);
1363 }
1364
1365 state.setHighestStatusForTaxon(null);
1366
1367 return;
1368 }
1369
1370
1371 /**
1372 * @param state
1373 * @return
1374 */
1375 private IdentifiableSource makeOriginalSource(CubaImportState state) {
1376 return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1377 }
1378 /**
1379 * @param state
1380 * @return
1381 */
1382 private DescriptionElementSource makeDescriptionSource(CubaImportState state) {
1383 return DescriptionElementSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1384 }
1385
1386 private static Set<UUID> doubtfulStatus = new HashSet<>();
1387
1388 /**
1389 * @param status
1390 * @return
1391 */
1392 private boolean isDoubtfulTerm(PresenceAbsenceTerm status) {
1393 if (doubtfulStatus.isEmpty()){
1394 doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyNaturalisedUuid);
1395 doubtfulStatus.add(CubaTransformer.doubtfulIndigenousDoubtfulUuid);
1396 doubtfulStatus.add(CubaTransformer.endemicDoubtfullyPresentUuid);
1397 doubtfulStatus.add(CubaTransformer.naturalisedDoubtfullyPresentUuid);
1398 doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyPresentUuid);
1399 doubtfulStatus.add(CubaTransformer.occasionallyCultivatedUuid);
1400 doubtfulStatus.add(CubaTransformer.rareCasualUuid);
1401 doubtfulStatus.add(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE().getUuid());
1402 doubtfulStatus.add(PresenceAbsenceTerm.CULTIVATED_PRESENCE_QUESTIONABLE().getUuid());
1403 }
1404 boolean isDoubtful = doubtfulStatus.contains(status.getUuid());
1405 return isDoubtful;
1406 }
1407
1408
1409 /**
1410 * @param area
1411 * @return
1412 */
1413 private boolean isCubanArea(NamedArea area) {
1414 if (area.getUuid().equals(CubaTransformer.uuidCuba)){
1415 return true;
1416 }else if (area.getPartOf()!= null){
1417 return isCubanArea(area.getPartOf());
1418 }else{
1419 return false;
1420 }
1421 }
1422
1423
1424 /**
1425 * @param record
1426 * @param state
1427 * @param familyTaxon
1428 * @param taxon
1429 */
1430 private void makeAlternativeFamilies(HashMap<String, String> record,
1431 CubaImportState state,
1432 TaxonNode familyTaxon,
1433 Taxon taxon) {
1434
1435 String famFRC = record.get("Fam. FRC");
1436 String famAS = record.get("Fam. A&S");
1437 String famFC = record.get("Fam. FC");
1438
1439 Reference<?> refFRC = makeReference(state, CubaTransformer.uuidRefFRC);
1440 Reference<?> refAS = makeReference(state, CubaTransformer.uuidRefAS);
1441 Reference<?> refFC = makeReference(state, CubaTransformer.uuidRefFC);
1442
1443 makeSingleAlternativeFamily(state, taxon, famFRC, refFRC);
1444 makeSingleAlternativeFamily(state, taxon, famAS, refAS);
1445 makeSingleAlternativeFamily(state, taxon, famFC, refFC);
1446 }
1447
1448
1449 /**
1450 * @param state
1451 * @param uuidreffrc
1452 * @return
1453 */
1454 private Reference<?> makeReference(CubaImportState state, UUID uuidRef) {
1455 Reference<?> ref = state.getReference(uuidRef);
1456 if (ref == null){
1457 ref = getReferenceService().find(uuidRef);
1458 state.putReference(uuidRef, ref);
1459 }
1460 return ref;
1461 }
1462
1463
1464 /**
1465 * @param state
1466 * @param taxon
1467 * @param famString
1468 * @param famRef
1469 */
1470 private void makeSingleAlternativeFamily(CubaImportState state, Taxon taxon, String famStr, Reference<?> famRef) {
1471 if (isBlank(famStr)){
1472 famStr = "-";
1473 // return;
1474 }
1475
1476 TaxonDescription desc = getTaxonDescription(taxon, false, true);
1477
1478 UUID altFamUuid1;
1479 UUID altFamUuid2;
1480 try {
1481 altFamUuid1 = state.getTransformer().getFeatureUuid("Alt.Fam.");
1482 altFamUuid2 = state.getTransformer().getFeatureUuid("Alt.Fam.2");
1483 } catch (UndefinedTransformerMethodException e) {
1484 throw new RuntimeException(e);
1485 }
1486
1487
1488 Taxon famTaxon = makeAlternativeFamilyTaxon(state, famStr, famRef);
1489
1490
1491 //TextData
1492 Feature feature1 = getFeature(state, altFamUuid1, "Families in other Floras (Text)", "Families in other Floras (Text)", "Other floras", null);
1493 feature1.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
1494 // TextData textData = TextData.NewInstance(feature1, famStr, Language.DEFAULT(), null);
1495 TextData textData = TextData.NewInstance(feature1, null, Language.DEFAULT(), null);
1496 textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null, famTaxon.getName(),null);
1497 desc.addElement(textData);
1498
1499
1500
1501 //TaxonInteraction
1502 Feature feature2 = getFeature(state, altFamUuid2, "Families in other Floras", "Families in other Floras", "Other floras(2)", null);
1503 feature2.setSupportsTaxonInteraction(true);
1504 feature2.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
1505 TaxonInteraction taxInteract = TaxonInteraction.NewInstance(feature2);
1506 textData.putText(Language.SPANISH_CASTILIAN(), "Familias en otras Floras");
1507 taxInteract.setTaxon2(famTaxon);
1508 taxInteract.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null);
1509 desc.addElement(taxInteract);
1510
1511 //Concept Relation
1512 famTaxon.addTaxonRelation(taxon, TaxonRelationshipType.INCLUDES(), taxon.getSec(), null);
1513
1514 }
1515
1516
1517
1518
1519
1520 /**
1521 * @param record
1522 * @param state
1523 * @param taxon
1524 */
1525 // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1526 // "CuC","VC","Ci","SS","CA","Cam","LT",
1527 // "CuE","Gr","Ho","SC","Gu",
1528 private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
1529 List<String> areaKeys = Arrays.asList(new String[]{
1530 "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1531 "CuC","VC","Ci","SS","CA","Cam","LT",
1532 "CuE","Gr","Ho","SC","Gu",
1533 });
1534 for (String areaKey : areaKeys){
1535 state.setCubanProvince(true);
1536 makeSingleProvinceDistribution(areaKey, record, state);
1537 }
1538 }
1539
1540 private void makeOtherAreasDistribution(HashMap<String, String> record, CubaImportState state) {
1541 List<String> areaKeys = Arrays.asList(new String[]{
1542 "Esp","Ja","PR","Men","Bah","Cay",
1543 "AmN","AmC","AmS","VM"});
1544 for (String areaKey : areaKeys){
1545 state.setCubanProvince(false);
1546 makeSingleProvinceDistribution(areaKey, record, state);
1547 }
1548 }
1549
1550
1551
1552
1553 /**
1554 * @param areaKey
1555 * @param record
1556 * @param state
1557 * @param highestStatus
1558 * @return
1559 * @throws UndefinedTransformerMethodException
1560 */
1561 private PresenceAbsenceTerm makeProvinceStatus(String areaKey,
1562 HashMap<String, String> record,
1563 CubaImportState state) throws UndefinedTransformerMethodException {
1564
1565 String statusStr = record.get(areaKey);
1566 if (statusStr == null){
1567 return null;
1568 }else{
1569 statusStr = statusStr.trim();
1570 }
1571 PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
1572 if (status == null){
1573 // PresenceAbsenceTerm highestStatus = state.getHighestStatusForTaxon();
1574 if (state.isCubanProvince() && isMinus(statusStr)){
1575 // getAbsenceTermForStatus(state, highestStatus);
1576 //we now handle cuban provinces same as external regions
1577 status = state.getTransformer().getPresenceTermByKey("--");
1578 }else if (! state.isCubanProvince() && isMinus(statusStr)){
1579 status = state.getTransformer().getPresenceTermByKey("--");
1580 }else{
1581 // logger.warn("Unhandled status str for provinces / external regions: " + statusStr);
1582 UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
1583 if (statusUuid == null){
1584 logger.error(state.getCurrentLine() + ": Undefined status str for provinces / external regions. No UUID given: '" + statusStr + "'");
1585 }else{
1586 status = getPresenceTerm(state, statusUuid, statusStr, statusStr, statusStr, false);
1587 }
1588 }
1589 }
1590
1591 return status;
1592 }
1593
1594
1595 /**
1596 * @param highestStatus
1597 * @throws UndefinedTransformerMethodException
1598 */
1599 private PresenceAbsenceTerm getAbsenceTermForStatus(CubaImportState state, PresenceAbsenceTerm highestStatus) throws UndefinedTransformerMethodException {
1600 if (highestStatus == null){
1601 logger.warn(state.getCurrentLine() + ": Highest status not defined");
1602 return null;
1603 }
1604 PresenceAbsenceTerm result = null;
1605 if (highestStatus.equals(getStatus(state, "E"))){
1606 result = getStatus(state, "-E");
1607 }else if (highestStatus.getUuid().equals(state.getTransformer().getPresenceTermUuid("Ind.")) || highestStatus.equals(PresenceAbsenceTerm.NATIVE())){
1608 result = getStatus(state, "-Ind.");
1609 }else if (highestStatus.equals(getStatus(state, "Ind.?"))){
1610 result = getStatus(state, "-Ind.?"); //TODO
1611 }else if (highestStatus.equals(getStatus(state, "N"))){
1612 result = getStatus(state, "-N");
1613 }else if (highestStatus.equals(getStatus(state, "P"))){
1614 result = getStatus(state, "-P");
1615 }else if (highestStatus.equals(getStatus(state, "A"))){
1616 result = getStatus(state, "-A");
1617 }else if (highestStatus.equals(getStatus(state, "C"))){
1618 result = getStatus(state, "-C");
1619 }
1620 logger.warn(state.getCurrentLine() + ": Absent province status could not be defined for highest status " + highestStatus.getTitleCache());
1621 return result;
1622 }
1623
1624
1625 /**
1626 * @param string
1627 * @return
1628 * @throws UndefinedTransformerMethodException
1629 */
1630 private PresenceAbsenceTerm getStatus(CubaImportState state, String key) throws UndefinedTransformerMethodException {
1631 PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(key);
1632 if (status == null){
1633 UUID statusUuid = state.getTransformer().getPresenceTermUuid(key);
1634 status = getPresenceTerm(state, statusUuid, null, null, null, false);
1635 }
1636 return status;
1637 }
1638
1639
1640 /**
1641 * Stores parent-child, synonym and common name relationships
1642 */
1643 @Override
1644 protected void secondPass(CubaImportState state) {
1645 // CyprusRow cyprusRow = state.getCyprusRow();
1646 return;
1647 }
1648
1649
1650 @Override
1651 protected boolean isIgnore(CubaImportState state) {
1652 return ! state.getConfig().isDoTaxa();
1653 }
1654
1655 @Override
1656 protected boolean doCheck(CubaImportState state) {
1657 logger.warn("DoCheck not yet implemented for CubaExcelImport");
1658 return true;
1659 }
1660
1661 }