2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.cuba
;
12 import java
.util
.ArrayList
;
13 import java
.util
.Arrays
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
16 import java
.util
.List
;
18 import java
.util
.UUID
;
19 import java
.util
.regex
.Matcher
;
20 import java
.util
.regex
.Pattern
;
22 import org
.apache
.commons
.lang
.StringUtils
;
23 import org
.apache
.log4j
.Logger
;
24 import org
.springframework
.stereotype
.Component
;
26 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
27 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
28 import eu
.etaxonomy
.cdm
.io
.excel
.common
.ExcelImporterBase
;
29 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
30 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
31 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
32 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
33 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
34 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
35 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
37 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
38 import eu
.etaxonomy
.cdm
.model
.common
.Representation
;
39 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
40 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementSource
;
41 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
42 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
43 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTerm
;
44 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
45 import eu
.etaxonomy
.cdm
.model
.description
.TaxonInteraction
;
46 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
47 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
48 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
49 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
50 import eu
.etaxonomy
.cdm
.model
.name
.NameRelationship
;
51 import eu
.etaxonomy
.cdm
.model
.name
.NameRelationshipType
;
52 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
53 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
54 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
55 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
56 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
57 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
58 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
59 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
60 import eu
.etaxonomy
.cdm
.model
.taxon
.ITaxonTreeNode
;
61 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationship
;
62 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
63 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
64 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
65 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonRelationshipType
;
66 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
67 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
75 public class CubaExcelImport
extends ExcelImporterBase
<CubaImportState
> {
76 private static final long serialVersionUID
= -747486709409732371L;
77 private static final Logger logger
= Logger
.getLogger(CubaExcelImport
.class);
79 private static final String HOMONYM_MARKER
= "\\s+homon.?$";
80 private static final String DOUBTFUL_MARKER
= "^\\?\\s?";
83 private static UUID rootUuid
= UUID
.fromString("206d42e4-ac32-4f20-a093-14826014e667");
84 private static UUID plantaeUuid
= UUID
.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
86 private static INonViralNameParser
<?
> nameParser
= NonViralNameParserImpl
.NewInstance();
87 private static NomenclaturalCode nc
= NomenclaturalCode
.ICNAFP
;
89 private static List
<String
> expectedKeys
= Arrays
.asList(new String
[]{
90 "Fam. default","Fam. FRC","Fam. A&S","Fam. FC",
91 "Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
94 protected void analyzeRecord(HashMap
<String
, String
> record
, CubaImportState state
) {
95 //we do everything in firstPass here
105 private void makeCubanDistribution(HashMap
<String
, String
> record
, CubaImportState state
) {
107 NamedArea cuba
= getNamedArea(state
, state
.getTransformer().getNamedAreaUuid("Cu"), null, null, null, null, null);
108 TaxonDescription desc
= getTaxonDescription(state
.getCurrentTaxon(), false, true);
109 List
<PresenceAbsenceTerm
> statuss
= makeCubanStatuss(record
, state
);
110 for (PresenceAbsenceTerm status
: statuss
){
111 Distribution distribution
= Distribution
.NewInstance(cuba
, status
);
112 desc
.addElement(distribution
);
113 distribution
.addSource(makeDescriptionSource(state
));
115 } catch (UndefinedTransformerMethodException e
) {
125 * @throws UndefinedTransformerMethodException
127 private List
<PresenceAbsenceTerm
> makeCubanStatuss(HashMap
<String
, String
> record
, CubaImportState state
) throws UndefinedTransformerMethodException
{
128 PresenceAbsenceTerm highestStatus
= null;
130 String line
= state
.getCurrentLine() + ": ";
131 List
<PresenceAbsenceTerm
> result
= new ArrayList
<>();
133 String endemicStr
= getValue(record
, "End");
134 String indigenousStr
= getValue(record
, "Ind");
135 String indigenousDoubtStr
= getValue(record
, "Ind? D");
136 String naturalisedStr
= getValue(record
, "Nat");
137 String dudStr
= getValue(record
, "Dud P");
138 String advStr
= getValue(record
, "Adv");
139 String cultStr
= getValue(record
, "Cult C");
141 state
.setEndemic(false);
143 if (endemicStr
!= null){
144 if(endemicStr
.equals("+")){
145 PresenceAbsenceTerm endemicState
= state
.getTransformer().getPresenceTermByKey("E");
146 result
.add(endemicState
);
147 highestStatus
= endemicState
;
148 state
.setEndemic(true);
149 }else if(isMinus(endemicStr
)){
150 UUID endemicUuid
= state
.getTransformer().getPresenceTermUuid("-E");
151 PresenceAbsenceTerm endemicState
= getPresenceTerm(state
, endemicUuid
, null, null, null, false);
152 result
.add(endemicState
);
153 checkAbsentHighestState(highestStatus
, line
, "endemic", false);
154 }else if(endemicStr
.equals("?")){
155 UUID endemicDoubtfulUuid
= state
.getTransformer().getPresenceTermUuid("?E");
156 PresenceAbsenceTerm endemicState
= getPresenceTerm(state
, endemicDoubtfulUuid
, null, null, null, false);
157 result
.add(endemicState
);
158 checkAbsentHighestState(highestStatus
, line
, "endemic", false);
160 logger
.warn(line
+ "Endemic not recognized: " + endemicStr
);
163 if (indigenousStr
!= null){
164 if(indigenousStr
.equals("+")){
165 PresenceAbsenceTerm indigenousState
= state
.getTransformer().getPresenceTermByKey("Ind.");
166 // PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
167 result
.add(indigenousState
);
168 highestStatus
= highestStatus
!= null ? highestStatus
: indigenousState
;
169 }else if(isMinus(indigenousStr
)){
170 PresenceAbsenceTerm indigenousState
= state
.getTransformer().getPresenceTermByKey("-Ind.");
171 result
.add(indigenousState
);
172 checkAbsentHighestState(highestStatus
, line
, "indigenous", false);
173 }else if(indigenousStr
.equals("?")){
174 PresenceAbsenceTerm indigenousDoubtState
= state
.getTransformer().getPresenceTermByKey("?Ind.");
175 // PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
176 result
.add(indigenousDoubtState
);
177 checkAbsentHighestState(highestStatus
, line
, "indigenous", true);
179 logger
.warn(line
+ "Indigenous not recognized: " + indigenousStr
);
182 if(indigenousDoubtStr
!= null){
183 if(indigenousDoubtStr
.equals("D")){
184 PresenceAbsenceTerm doubtIndigenousState
= state
.getTransformer().getPresenceTermByKey("Ind.?");
185 // PresenceAbsenceTerm doubtIndigenousState = getPresenceTerm(state, doubtIndigenousUuid, null, null, null, false);
186 result
.add(doubtIndigenousState
);
187 highestStatus
= highestStatus
!= null ? highestStatus
: doubtIndigenousState
;
188 }else if(isMinus(indigenousDoubtStr
)){
189 UUID doubtIndigenousErrorUuid
= state
.getTransformer().getPresenceTermUuid("-Ind.?");
190 PresenceAbsenceTerm doubtIndigenousErrorState
= getPresenceTerm(state
, doubtIndigenousErrorUuid
, null, null, null, false);
191 result
.add(doubtIndigenousErrorState
);
192 checkAbsentHighestState(highestStatus
, line
, "doubtfully indigenous", true);
194 logger
.warn(line
+ "doubtfully indigenous not recognized: " + indigenousDoubtStr
);
197 if(naturalisedStr
!= null){
198 if(naturalisedStr
.equals("N")){
199 PresenceAbsenceTerm haturalizedState
= state
.getTransformer().getPresenceTermByKey("Nat.");
200 result
.add(haturalizedState
);
201 highestStatus
= highestStatus
!= null ? highestStatus
: haturalizedState
;
202 }else if(isMinus(naturalisedStr
)){
203 UUID naturalisedErrorUuid
= state
.getTransformer().getPresenceTermUuid("-Nat.");
204 PresenceAbsenceTerm naturalisedErrorState
= getPresenceTerm(state
, naturalisedErrorUuid
, null, null, null, false);
205 result
.add(naturalisedErrorState
);
206 checkAbsentHighestState(highestStatus
, line
, "naturalized", false);
207 }else if(naturalisedStr
.equals("?")){
208 UUID naturalisedDoubtUuid
= state
.getTransformer().getPresenceTermUuid("?Nat.");
209 PresenceAbsenceTerm naturalisedDoubtState
= getPresenceTerm(state
, naturalisedDoubtUuid
, null, null, null, false);
210 result
.add(naturalisedDoubtState
);
211 checkAbsentHighestState(highestStatus
, line
, "naturalized", true);
213 logger
.warn(line
+ "Naturalized not recognized: " + naturalisedStr
);
217 if(dudStr
.equals("P")){
218 UUID dudUuid
= state
.getTransformer().getPresenceTermUuid("Dud.");
219 PresenceAbsenceTerm dudState
= getPresenceTerm(state
, dudUuid
, null, null, null, false);
220 result
.add(dudState
);
221 highestStatus
= highestStatus
!= null ? highestStatus
: dudState
;
222 }else if(isMinus(dudStr
)){
223 UUID nonNativeErrorUuid
= state
.getTransformer().getPresenceTermUuid("-Dud.");
224 PresenceAbsenceTerm nonNativeErrorState
= getPresenceTerm(state
, nonNativeErrorUuid
, null, null, null, false);
225 result
.add(nonNativeErrorState
);
226 checkAbsentHighestState(highestStatus
, line
, "non-native and doubtfully naturalised", false);
227 }else if(dudStr
.equals("?")){
228 UUID naturalisedDoubtUuid
= state
.getTransformer().getPresenceTermUuid("?Dud.");
229 PresenceAbsenceTerm naturalisedDoubtState
= getPresenceTerm(state
, naturalisedDoubtUuid
, null, null, null, false);
230 result
.add(naturalisedDoubtState
);
231 checkAbsentHighestState(highestStatus
, line
, "non-native and doubtfully naturalised", true);
233 logger
.warn(line
+ "non-native and doubtfully naturalised not recognized: " + dudStr
);
237 if(advStr
.equals("A")){
238 PresenceAbsenceTerm advState
= state
.getTransformer().getPresenceTermByKey("Adv.");
239 // PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
240 result
.add(advState
);
241 highestStatus
= highestStatus
!= null ? highestStatus
: advState
;
242 }else if(isMinus(advStr
)){
243 UUID advUuid
= state
.getTransformer().getPresenceTermUuid("-Adv.");
244 PresenceAbsenceTerm advState
= getPresenceTerm(state
, advUuid
, null, null, null, false);
245 result
.add(advState
);
246 checkAbsentHighestState(highestStatus
, line
, "adventive", false);
247 }else if(advStr
.equals("(A)")){
248 UUID rareCasualUuid
= state
.getTransformer().getPresenceTermUuid("(A)");
249 PresenceAbsenceTerm rareCasual
= getPresenceTerm(state
, rareCasualUuid
, null, null, null, false);
250 result
.add(rareCasual
);
252 logger
.warn(line
+ "'adventive (casual) alien' not recognized: " + advStr
);
254 }else if(cultStr
!= null){
255 if (! (cultStr
.matches("(C|\\(C\\)|\\?|–)"))){
256 logger
.warn("'cultivated' not recognized: " + cultStr
);
257 }else if(cultStr
.equals("C")){
258 PresenceAbsenceTerm cultivatedState
= state
.getTransformer().getPresenceTermByKey("Cult.");
259 result
.add(cultivatedState
);
260 highestStatus
= highestStatus
!= null ? highestStatus
: cultivatedState
;
261 }else if(cultStr
.equals("?")){
262 PresenceAbsenceTerm cultivatedState
= state
.getTransformer().getPresenceTermByKey("?Cult.");
263 result
.add(cultivatedState
);
264 checkAbsentHighestState(highestStatus
, line
, "cultivated", true);
265 }else if(cultStr
.equals("(C)")){
266 UUID ocassualCultUuid
= state
.getTransformer().getPresenceTermUuid("(C)");
267 PresenceAbsenceTerm cultivatedState
= getPresenceTerm(state
, ocassualCultUuid
, null, null, null, false);
268 result
.add(cultivatedState
);
269 }else if(isMinus(cultStr
)){
270 PresenceAbsenceTerm cultivatedState
= state
.getTransformer().getPresenceTermByKey("-Cult.");
271 result
.add(cultivatedState
);
272 checkAbsentHighestState(highestStatus
, line
, "cultivated", false);
274 logger
.warn(line
+ "'cultivated' not recognized: " + cultStr
);
277 state
.setHighestStatusForTaxon(highestStatus
);
283 * @param highestStatus
286 private void checkAbsentHighestState(PresenceAbsenceTerm highestStatus
, String line
, String stateLabel
, boolean doubtful
) {
287 //can be removed, highest status is not used anymore
288 if (highestStatus
== null){
289 String absentStr
= doubtful ?
"doubtful" : "absent";
290 logger
.info(line
+ "Highest cuban state is " + absentStr
+ " " + stateLabel
);
297 * @param indigenousStr
300 private boolean isMinus(String str
) {
301 return str
.equals("-") || str
.equals("–") || str
.equals("‒");
306 * @param indigenousStr
309 private boolean checkPlusMinusDoubt(String str
) {
310 return str
.equals("+") || isMinus(str
)|| str
.equals("?");
315 * @param indigenousStr
316 * @param indigenousDoubtStr
317 * @param naturalisedStr
322 private boolean checkAllNull(String
... others
) {
323 for (String other
: others
){
332 private static final String acceptedRegExStr
= "\\(([^\\[\\]“”]{6,})\\)";
333 // String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
334 // + "(\\((.{6,})\\))?";
335 private static final String heterotypicRegExStr
= "([^\\(\\[\\]“”]{5,})"
337 private static final String heterotypicRegExStr_TEST
= "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
339 private static final String auctRegExStr
= "auct\\."
340 +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sCombs|\\sBritton|\\sGriseb\\.(\\sFC-S|\\sA&S)?|\\sWright"
341 + "|\\sHammer|\\sEngl\\.||\\sMaza|\\sMiers|\\sRoig|\\sBorhidi|\\sFRC|\\sCoL"
342 + "|\\sAckerman|\\sMújica|\\sDíaz|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
345 private static final String missapliedRegExStr
= "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr
+ "|sensu\\s+.{2,})";
346 private static final String sphalmRegExStr
= "“(.*{5,})”\\s+((FC-S|A&S)\\s)?sphalm\\.(\\s(FC(-S)?|A&S|inval\\.))?";
347 private static final String nomInvalRegExStr
= "“(.*{5,})”\\s+nom\\.\\s+inval\\.(\\s(West|Moldenke|FC|Jacq.))?";
348 private static final String homonymRegExStr
= "\\s*(\\[.*\\])*\\s*";
350 private static final Pattern acceptedRegEx
= Pattern
.compile(acceptedRegExStr
+ homonymRegExStr
);
351 private static final Pattern heterotypicRegEx
= Pattern
.compile(heterotypicRegExStr
+ homonymRegExStr
);
352 private static final Pattern missapliedRegEx
= Pattern
.compile(missapliedRegExStr
);
353 private static final Pattern nomInvalRegEx
= Pattern
.compile(nomInvalRegExStr
);
354 private static final Pattern sphalmRegEx
= Pattern
.compile(sphalmRegExStr
);
361 private void makeSynonyms(HashMap
<String
, String
> record
, CubaImportState state
, boolean isFirstSynonym
) {
362 // boolean forAccepted = true;
363 String synonymStr
= record
.get("Syn.");
364 String line
= state
.getCurrentLine() + ": ";
367 if (synonymStr
== null){
368 //TODO test that this is not a synonym only line
372 if (state
.getCurrentTaxon() == null){
373 logger
.error(line
+ "Current taxon is null for synonym");
378 synonymStr
= synonymStr
.trim();
379 synonymStr
= synonymStr
.replace("[taxon]", "[infraspec.]");
381 // String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
382 // String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
384 // Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
387 Matcher missapliedMatcher
= missapliedRegEx
.matcher(synonymStr
);
388 Matcher nomInvalMatcher
= nomInvalRegEx
.matcher(synonymStr
);
389 Matcher acceptedMatcher
= acceptedRegEx
.matcher(synonymStr
);
390 Matcher heterotypicMatcher
= heterotypicRegEx
.matcher(synonymStr
);
391 Matcher sphalmMatcher
= sphalmRegEx
.matcher(synonymStr
);
393 List
<BotanicalName
> homonyms
= new ArrayList
<>();
394 if (missapliedMatcher
.matches()){
395 boolean doubtful
= missapliedMatcher
.group(1) != null;
396 String firstPart
= missapliedMatcher
.group(2);
397 BotanicalName name
= (BotanicalName
)nameParser
.parseSimpleName(firstPart
, state
.getConfig().getNomenclaturalCode(), Rank
.SPECIES());
398 name
.addSource(makeOriginalSource(state
));
400 String secondPart
= missapliedMatcher
.group(3);
401 Taxon misappliedNameTaxon
= Taxon
.NewInstance(name
, null);
402 misappliedNameTaxon
.addSource(makeOriginalSource(state
));
403 misappliedNameTaxon
.setDoubtful(doubtful
);
404 if (secondPart
.startsWith("sensu")){
405 secondPart
= secondPart
.substring(5).trim();
406 if (secondPart
.contains(" ")){
407 logger
.warn(line
+ "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart
);
409 Reference
<?
> sensu
= ReferenceFactory
.newGeneric();
410 Team team
= Team
.NewTitledInstance(secondPart
, null);
411 sensu
.setAuthorship(team
);
412 misappliedNameTaxon
.setSec(sensu
);
413 }else if (secondPart
.matches(auctRegExStr
)){
414 secondPart
= secondPart
.replace("p. p.", "p.p.");
415 misappliedNameTaxon
.setAppendedPhrase(secondPart
);
417 logger
.warn(line
+ "Misapplied second part not recognized: " + secondPart
);
420 Reference
<?
> relRef
= null;
421 state
.getCurrentTaxon().addMisappliedName(misappliedNameTaxon
, relRef
, null);
422 }else if (nomInvalMatcher
.matches()){
423 String firstPart
= nomInvalMatcher
.group(1);
424 String afterInval
= nomInvalMatcher
.group(2);
425 if (StringUtils
.isNotBlank(afterInval
)){
426 logger
.warn(state
.getCurrentLine() + ": After inval to be implemented: " + afterInval
);
428 BotanicalName name
= (BotanicalName
)nameParser
.parseSimpleName(firstPart
, state
.getConfig().getNomenclaturalCode(), Rank
.SPECIES());
429 name
.addSource(makeOriginalSource(state
));
430 NomenclaturalStatus status
= NomenclaturalStatus
.NewInstance( NomenclaturalStatusType
.INVALID());
431 name
.addStatus(status
);
432 SynonymRelationship sr
= state
.getCurrentTaxon().addSynonymName(name
, SynonymRelationshipType
.SYNONYM_OF());
433 sr
.getSynonym().addSource(makeOriginalSource(state
));
434 }else if (sphalmMatcher
.matches()){
435 String firstPart
= sphalmMatcher
.group(1);
436 String sphalmPart
= synonymStr
.replace(firstPart
, "").replace("“","").replace("”","").trim();
437 BotanicalName name
= (BotanicalName
)nameParser
.parseSimpleName(firstPart
, state
.getConfig().getNomenclaturalCode(), Rank
.SPECIES());
438 // NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
439 // name.addStatus(status);
440 name
.addSource(makeOriginalSource(state
));
441 SynonymRelationship sr
= state
.getCurrentTaxon().addSynonymName(name
, SynonymRelationshipType
.SYNONYM_OF());
442 sr
.getSynonym().setAppendedPhrase(sphalmPart
);
443 sr
.getSynonym().setSec(null);
444 sr
.getSynonym().addSource(makeOriginalSource(state
));
445 }else if (acceptedMatcher
.matches()){
446 String firstPart
= acceptedMatcher
.group(1);
447 String homonymPart
= acceptedMatcher
.groupCount() < 2 ?
null : acceptedMatcher
.group(2);
448 List
<BotanicalName
> list
= handleHomotypicGroup(firstPart
, state
, (BotanicalName
)state
.getCurrentTaxon().getName(), false, homonyms
, homonymPart
, false);
449 checkFirstSynonym(state
, list
, isFirstSynonym
, synonymStr
, false);
450 }else if(heterotypicMatcher
.matches()){
451 String firstPart
= heterotypicMatcher
.group(1).trim();
452 String secondPart
= heterotypicMatcher
.groupCount() < 3 ?
null : heterotypicMatcher
.group(3);
453 String homonymPart
= heterotypicMatcher
.groupCount() < 4 ?
null : heterotypicMatcher
.group(4);
454 boolean isDoubtful
= firstPart
.matches("^\\?\\s*.*");
455 firstPart
= replaceHomonIlleg(firstPart
);
456 boolean isHomonym
= firstPart
.matches(".*" + HOMONYM_MARKER
);
457 BotanicalName synName
= makeName(state
, firstPart
);
458 if (synName
.isProtectedTitleCache()){
459 logger
.warn(line
+ "Heterotypic base synonym could not be parsed correctly: " + firstPart
);
462 homonyms
.add(synName
);
464 SynonymRelationship sr
= state
.getCurrentTaxon().addHeterotypicSynonymName(synName
);
465 sr
.getSynonym().setDoubtful(isDoubtful
);
466 sr
.getSynonym().addSource(makeOriginalSource(state
));
467 List
<BotanicalName
> list
= handleHomotypicGroup(secondPart
, state
, synName
, true, homonyms
, homonymPart
, isDoubtful
);
468 checkFirstSynonym(state
, list
, isFirstSynonym
, synonymStr
, true);
470 }else if (isSpecialHeterotypic(synonymStr
)){
471 BotanicalName synName
= makeName(state
, synonymStr
);
472 if (synName
.isProtectedTitleCache()){
473 logger
.warn(line
+ "Special heterotypic synonym could not be parsed correctly:" + synonymStr
);
475 SynonymRelationship sr
= state
.getCurrentTaxon().addHeterotypicSynonymName(synName
);
476 sr
.getSynonym().addSource(makeOriginalSource(state
));
478 logger
.warn(line
+ "Synonym entry does not match: " + synonymStr
);
485 * @param isFirstSynonym
489 private void checkFirstSynonym(CubaImportState state
, List
<BotanicalName
> list
, boolean isFirstSynonym
, String synonymStr
, boolean isHeterotypicMatcher
) {
490 if (!isFirstSynonym
){
493 String line
= state
.getCurrentLine() + ": ";
494 BotanicalName currentName
= isHeterotypicMatcher?
(BotanicalName
)state
.getCurrentTaxon().getName(): list
.get(0);
495 boolean currentHasBasionym
= currentName
.getBasionymAuthorship() != null;
496 BotanicalName firstSynonym
= isHeterotypicMatcher ? list
.get(0): list
.get(1);
497 // if (list.size() <= 1){
498 // logger.error(line + "homotypic list size is 1 but shouldn't");
501 if (isHeterotypicMatcher
&& currentHasBasionym
){
502 logger
.error(line
+ "Current taxon (" + currentName
.getTitleCache() + ") has basionym author but has no homotypic basionym , but : " + synonymStr
);
503 }else if (isHeterotypicMatcher
){
504 //first synonym must not have a basionym author
505 if (firstSynonym
.getBasionymAuthorship() != null){
506 logger
.error(line
+ "Current taxon (" + currentName
.getTitleCache() + ") has no basionym but first synonym requires basionym : " + synonymStr
);
508 }else{ //isAcceptedMatcher
509 if (currentHasBasionym
){
510 if (! matchAuthor(currentName
.getBasionymAuthorship(), firstSynonym
.getCombinationAuthorship())){
511 logger
.info(line
+ "Current basionym author and first synonym combination author do not match: " + currentName
.getTitleCache() + "<->" + firstSynonym
.getTitleCache());
514 if (! matchAuthor(currentName
.getCombinationAuthorship(), firstSynonym
.getBasionymAuthorship())){
515 logger
.info(line
+ "Current combination author and first synonym basionym author do not match: " + currentName
.getTitleCache() + "<->" + firstSynonym
.getTitleCache());
527 private boolean isSpecialHeterotypic(String synonymStr
) {
528 if (synonymStr
== null){
530 }else if (synonymStr
.equals("Rhynchospora prenleloupiana (‘prenteloupiana’) Boeckeler")){
532 }else if (synonymStr
.equals("Psidium longipes var. orbiculare (O.Berg) McVaugh")){
545 private void makeSingleProvinceDistribution(String areaKey
,
546 HashMap
<String
, String
> record
,
547 CubaImportState state
) {
549 UUID areaUuid
= state
.getTransformer().getNamedAreaUuid(areaKey
);
550 if (areaUuid
== null){
551 logger
.warn("Area not recognized: " + areaKey
);
554 if (record
.get(areaKey
)==null){
555 return; //no status defined
558 NamedArea area
= getNamedArea(state
, areaUuid
, null, null, null, null, null);
560 logger
.warn(state
.getCurrentLine() + ": Area not recognized: " + area
);
562 TaxonDescription desc
= getTaxonDescription(state
.getCurrentTaxon(), false, true);
563 PresenceAbsenceTerm status
= makeProvinceStatus(areaKey
, record
, state
);
565 logger
.warn(state
.getCurrentLine() + ": Province distribution status could not be defined: " + record
.get(areaKey
));
567 Distribution distribution
= Distribution
.NewInstance(area
, status
);
568 desc
.addElement(distribution
);
569 distribution
.addSource(makeDescriptionSource(state
));
570 } catch (UndefinedTransformerMethodException e
) {
584 * @param homotypicalGroup
586 private List
<BotanicalName
> handleHomotypicGroup(String homotypicStrOrig
,
587 CubaImportState state
,
588 BotanicalName homotypicName
,
589 boolean isHeterotypic
,
590 List
<BotanicalName
> homonyms
,
592 boolean isDoubtful
) {
594 List
<BotanicalName
> homotypicNameList
= new ArrayList
<>();
595 homotypicNameList
.add(homotypicName
);
597 String homotypicStr
= homotypicStrOrig
;
598 if (homotypicStr
== null){
599 return homotypicNameList
;
600 }else if (homotypicStr
.startsWith("(") && homotypicStr
.endsWith("")){
601 homotypicStr
= homotypicStr
.substring(1, homotypicStr
.length() - 1);
604 HomotypicalGroup homotypicGroup
= homotypicName
.getHomotypicalGroup();
605 String
[] splits
= homotypicStr
.split("\\s*,\\s*");
606 for (String split
: splits
){
607 split
= replaceHomonIlleg(split
);
608 boolean isHomonym
= split
.matches(".*" + HOMONYM_MARKER
);
609 BotanicalName newName
= makeName(state
, split
);
610 newName
.setHomotypicalGroup(homotypicGroup
); //not really necessary as this is later set anyway
611 if (newName
.isProtectedTitleCache()){
612 logger
.warn(state
.getCurrentLine() + ": homotypic name part could not be parsed: " + split
);
615 homonyms
.add(newName
);
618 SynonymRelationship sr
= state
.getCurrentTaxon().addHeterotypicSynonymName(newName
, homotypicGroup
, null, null);
619 sr
.getSynonym().setDoubtful(isDoubtful
);
620 sr
.getSynonym().addSource(makeOriginalSource(state
));
621 // newName.addBasionym(homotypicName);
623 state
.getCurrentTaxon().addHomotypicSynonymName(newName
, null, null);
625 handleBasionym(state
, homotypicNameList
, homonyms
, newName
);
626 homotypicNameList
.add(newName
);
628 makeHomonyms(homonyms
, homonymPart
, state
, homotypicGroup
);
629 return homotypicNameList
;
637 private String
replaceHomonIlleg(String split
) {
638 String result
= split
.trim().replace("homon. illeg.", "nom. illeg. homon.").trim();
647 * @param currentBasionym
649 private void makeHomonyms(List
<BotanicalName
> homonyms
, String homonymPartOrig
, CubaImportState state
,
650 HomotypicalGroup homotypicGroup
) {
651 String line
= state
.getCurrentLine() + ": ";
652 String homonymPart
= homonymPartOrig
== null ?
"" : homonymPartOrig
.trim();
653 if (homonyms
.isEmpty() && homonymPart
.equals("")){
655 }else if (homonymPart
.equals("")){
656 logger
.warn(line
+ "SynonymPart has homonyms but homonymPart is empty");
659 homonymPart
= homonymPart
.substring(1, homonymPart
.length() - 1);
660 String
[] splits
= homonymPart
.split("\\]\\s*\\[");
661 if (splits
.length
!= homonyms
.size()){
662 if(homonyms
.size() == 0 && splits
.length
>= 1){
663 handleSimpleBlockingNames(splits
, state
, homotypicGroup
);
665 logger
.warn(line
+ "Number of homonyms (" + homonyms
.size() + ") and homonymParts ("+splits
.length
+") does not match");
670 for (String split
: splits
){
671 split
= split
.replaceAll("^non\\s+", "");
672 BotanicalName newName
= makeName(state
, split
);
673 // BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
674 if (newName
.isProtectedTitleCache()){
675 logger
.warn(state
.getCurrentLine() + ": homonym name could not be parsed: " + split
);
677 homonyms
.get(i
).addRelationshipToName(newName
, NameRelationshipType
.LATER_HOMONYM(), null);
685 * @param homotypicGroup
687 private void handleSimpleBlockingNames(String
[] splitsi
,
688 CubaImportState state
,
689 HomotypicalGroup homotypicGroup
) {
690 List
<BotanicalName
> replacementNameCandidates
= new ArrayList
<>();
691 for (String spliti
: splitsi
){
693 String split
= spliti
.replaceAll("^non\\s+", "");
694 BotanicalName newName
= makeName(state
, split
);
695 if (newName
.isProtectedTitleCache()){
696 logger
.warn(state
.getCurrentLine() + ": blocking name could not be parsed: " + split
);
698 Set
<BotanicalName
> typifiedNames
= (Set
)homotypicGroup
.getTypifiedNames();
699 Set
<BotanicalName
> candidates
= new HashSet
<>();
700 for (BotanicalName name
: typifiedNames
){
701 if (name
.getGenusOrUninomial() != null && name
.getGenusOrUninomial().equals(newName
.getGenusOrUninomial())){
702 if (name
.getStatus().isEmpty() || ! name
.getStatus().iterator().next().getType().equals(NomenclaturalStatusType
.ILLEGITIMATE())){
703 candidates
.add(name
);
707 if (candidates
.size() == 1){
708 BotanicalName blockedName
= candidates
.iterator().next();
709 newName
.addRelationshipToName(blockedName
, NameRelationshipType
.BLOCKING_NAME_FOR(), null);
710 replacementNameCandidates
.add(blockedName
);
712 logger
.warn(state
.getCurrentLine() + ": Blocking name could not be handled. " + candidates
.size() + " candidates.");
715 makeReplacedSynonymIfPossible(state
, homotypicGroup
, replacementNameCandidates
);
719 * @param homotypicGroup
720 * @param replacementNameCandidates
722 private void makeReplacedSynonymIfPossible(CubaImportState state
,
723 HomotypicalGroup homotypicGroup
,
724 List
<BotanicalName
> replacementNameCandidates
) {
725 String line
= state
.getCurrentLine() +": ";
726 List
<BotanicalName
> replacedCandidates
= new ArrayList
<>();
727 for (TaxonNameBase
<?
, ?
> typifiedName
: homotypicGroup
.getTypifiedNames()){
728 BotanicalName candidate
= (BotanicalName
)typifiedName
;
729 if (candidate
.getBasionymAuthorship() == null){
730 if (candidate
.getStatus().isEmpty()){
731 if (! replacementNameCandidates
.contains(candidate
)){
732 replacedCandidates
.add(candidate
);
737 if (replacedCandidates
.size() == 1){
738 BotanicalName replacedSynonym
= replacedCandidates
.iterator().next();
739 for (BotanicalName replacementName
: replacementNameCandidates
){
740 replacementName
.addReplacedSynonym(replacedSynonym
, null, null, null);
742 }else if (replacedCandidates
.size() < 1){
743 logger
.warn(line
+ "No replaced synonym candidate found");
745 logger
.warn(line
+ "More than 1 ("+replacedCandidates
.size()+") replaced synonym candidates found");
751 * @param homotypicGroup
754 private void handleBasionym(CubaImportState state
, List
<BotanicalName
> homotypicNameList
,
755 List
<BotanicalName
> homonyms
, BotanicalName newName
) {
756 for (BotanicalName existingName
: homotypicNameList
){
757 if (existingName
!= newName
){ //should not happen anymore, as new name is added later
758 boolean onlyIfNotYetExists
= true;
759 createBasionymRelationIfPossible(state
, existingName
, newName
, homonyms
.contains(newName
), onlyIfNotYetExists
);
770 private void createBasionymRelationIfPossible(CubaImportState state
, BotanicalName name1
, BotanicalName name2
,
771 boolean name2isHomonym
, boolean onlyIfNotYetExists
) {
772 BotanicalName basionymName
= name1
;
773 BotanicalName newCombination
= name2
;
774 //exactly one name must have a basionym author
775 if (name1
.getBasionymAuthorship() == null && name2
.getBasionymAuthorship() == null
776 || name1
.getBasionymAuthorship() != null && name2
.getBasionymAuthorship() != null){
780 //switch order if necessary
781 if (! name2isHomonym
&& basionymName
.getBasionymAuthorship() != null && newCombination
.getBasionymAuthorship() == null){
782 basionymName
= name2
;
783 newCombination
= name1
;
785 if (matchAuthor(basionymName
.getCombinationAuthorship(), newCombination
.getBasionymAuthorship())
786 && matchLastNamePart(basionymName
, newCombination
)){
787 newCombination
.addBasionym(basionymName
);
789 if ( (newCombination
.getBasionyms().isEmpty() || ! onlyIfNotYetExists
)
790 && isLegitimate(basionymName
)
791 && ! name2isHomonym
){
792 logger
.info(state
.getCurrentLine() + ": Names are potential basionyms but either author or name part do not match: " + basionymName
.getTitleCache() + " <-> " + newCombination
.getTitleCache());
798 * @param basionymName
801 private boolean isLegitimate(BotanicalName basionymName
) {
802 for (NomenclaturalStatus nomStatus
: basionymName
.getStatus()){
803 if (nomStatus
.getType()!= null && nomStatus
.getType().isIllegitimateType()){
807 for (NameRelationship nameRel
: basionymName
.getNameRelations()){
808 if (nameRel
.getType()!= null && nameRel
.getType().isIllegitimateType()){
817 * @param basionymName
818 * @param newCombination
821 private boolean matchLastNamePart(BotanicalName name1
, BotanicalName name2
) {
822 String lastNamePart1
= name1
.getLastNamePart();
823 String lastNamePart2
= name2
.getLastNamePart();
824 if (lastNamePart1
!= null && lastNamePart2
!= null){
825 lastNamePart1
= normalizeBasionymNamePart(lastNamePart1
);
826 lastNamePart2
= normalizeBasionymNamePart(lastNamePart2
);
827 return (lastNamePart1
.equals(lastNamePart2
));
834 * @param lastNamePart1
837 private String
normalizeBasionymNamePart(String lastNamePart
) {
838 String namePart
= lastNamePart
.toLowerCase()
839 .replaceAll("(um|us|a|is|e|os|on|or)$", "")
840 .replaceAll("er$", "r") //e.g. ruber <-> rubra
841 .replaceAll("ese$", "s"); //e.g. cayanensis <-> cayanenese
842 //TODO tampensis / tampense
848 * @param combinationAuthorship
852 private boolean matchAuthor(TeamOrPersonBase
<?
> author1
, TeamOrPersonBase
<?
> author2
) {
853 if (author1
== null || author2
== null){
856 return author1
.getNomenclaturalTitle().equals(author2
.getNomenclaturalTitle());
866 private void makeNotes(HashMap
<String
, String
> record
, CubaImportState state
) {
867 String notesStr
= getValue(record
, "(Notas)");
868 if (notesStr
== null){
871 Annotation annotation
= Annotation
.NewDefaultLanguageInstance(notesStr
);
873 annotation
.setAnnotationType(AnnotationType
.TECHNICAL());
874 state
.getCurrentTaxon().addAnnotation(annotation
);
885 private Taxon
makeTaxon(HashMap
<String
, String
> record
, CubaImportState state
, TaxonNode familyNode
, boolean isSynonym
) {
886 String taxonStrOrig
= getValue(record
, "Taxón");
887 if (taxonStrOrig
== null){
888 return isSynonym ? state
.getCurrentTaxon() : null;
891 boolean isAbsent
= false;
892 String taxonStr
= taxonStrOrig
;
893 if (taxonStrOrig
.startsWith("[") && taxonStrOrig
.endsWith("]")){
894 taxonStr
= taxonStr
.substring(1, taxonStr
.length() - 1);
898 boolean isAuct
= false;
899 if (taxonStr
.endsWith("auct.")){
901 taxonStr
.replace("auct.", "").trim();
903 state
.setTaxonIsAbsent(isAbsent
);
904 BotanicalName botanicalName
= makeName(state
, taxonStr
);
905 Reference
<?
> sec
= getSecReference(state
);
906 Taxon taxon
= Taxon
.NewInstance(botanicalName
, sec
);
908 taxon
.setAppendedPhrase("auct.");
911 TaxonNode higherNode
;
912 if (botanicalName
.isProtectedTitleCache()){
913 logger
.warn(state
.getCurrentLine() + ": Taxon could not be parsed: " + taxonStrOrig
);
914 higherNode
= familyNode
;
916 String genusStr
= botanicalName
.getGenusOrUninomial();
917 Taxon genus
= state
.getHigherTaxon(genusStr
);
919 higherNode
= genus
.getTaxonNodes().iterator().next();
921 BotanicalName name
= BotanicalName
.NewInstance(Rank
.GENUS());
922 name
.addSource(makeOriginalSource(state
));
923 name
.setGenusOrUninomial(genusStr
);
924 genus
= Taxon
.NewInstance(name
, sec
);
925 genus
.addSource(makeOriginalSource(state
));
926 higherNode
= familyNode
.addChildTaxon(genus
, null, null);
927 state
.putHigherTaxon(genusStr
, genus
);
931 botanicalName
.setTitleCache(taxonStrOrig
, true);
932 taxon
.setExcluded(true);
935 higherNode
.addChildTaxon(taxon
, null, null);
936 taxon
.addSource(makeOriginalSource(state
));
941 private final String orthVarRegExStr
= "[A-Z][a-z]+\\s[a-z]+\\s(\\(‘([a-z]){3,}’\\))\\s(\\([A-Z][a-z]+\\.?\\)\\s)?[A-Z][a-zó]+\\.?";
942 private final Pattern orthVarRegEx
= Pattern
.compile(orthVarRegExStr
);
947 private BotanicalName
makeName(CubaImportState state
, String nameStrOrig
) {
949 String nameStr
= normalizeStatus(nameStrOrig
);
951 Matcher orthVarMatcher
= orthVarRegEx
.matcher(nameStr
);
952 String orthVar
= null;
953 if (orthVarMatcher
.matches()) {
954 orthVar
= orthVarMatcher
.group(1);
955 nameStr
= nameStr
.replace(" " + orthVar
, "").trim().replaceAll("\\s{2,}", " ");
956 orthVar
= orthVar
.substring(2, orthVar
.length() - 2);
959 boolean isNomInval
= false;
960 if (nameStr
.endsWith("nom. inval.")){
962 nameStr
= nameStr
.replace("nom. inval.", "").trim();
965 BotanicalName result
= (BotanicalName
)nameParser
.parseReferencedName(nameStr
, nc
, Rank
.SPECIES());
966 result
.addSource(makeOriginalSource(state
));
968 result
.addStatus(NomenclaturalStatus
.NewInstance(NomenclaturalStatusType
.INVALID()));
970 if (orthVar
!= null){
971 BotanicalName orthVarName
= (BotanicalName
)result
.clone();
972 orthVarName
.addSource(makeOriginalSource(state
));
974 Reference
<?
> citation
= null;
975 orthVarName
.addRelationshipToName(result
, NameRelationshipType
.ORTHOGRAPHIC_VARIANT(), citation
, null, null);
976 orthVarName
.setSpecificEpithet(orthVar
);
978 normalizeAuthors(result
);
986 private void normalizeAuthors(BotanicalName result
) {
987 result
.setCombinationAuthorship(normalizeAuthor(result
.getCombinationAuthorship()));
988 result
.setExCombinationAuthorship(normalizeAuthor(result
.getExCombinationAuthorship()));
989 result
.setExBasionymAuthorship(normalizeAuthor(result
.getExBasionymAuthorship()));
990 result
.setBasionymAuthorship(normalizeAuthor(result
.getBasionymAuthorship()));
996 * @param combinationAuthorship
999 private TeamOrPersonBase
<?
> normalizeAuthor(TeamOrPersonBase
<?
> author
) {
1000 if (author
== null){
1003 TeamOrPersonBase
<?
> result
;
1004 if (author
.isInstanceOf(Person
.class)){
1005 result
= normalizePerson(CdmBase
.deproxy(author
, Person
.class));
1007 Team team
= CdmBase
.deproxy(author
, Team
.class);
1008 List
<Person
> list
= team
.getTeamMembers();
1009 for(int i
= 0; i
< list
.size(); i
++){
1010 Person person
= list
.get(i
);
1011 Person tmpMember
= normalizePerson(person
);
1012 list
.set(i
, tmpMember
);
1024 private Person
normalizePerson(Person person
) {
1025 String title
= person
.getNomenclaturalTitle();
1026 title
= title
.replaceAll("(?<=[a-zA-Z])\\.(?=[a-zA-Z])", ". ");
1027 person
.setNomenclaturalTitle(title
);
1028 boolean isFilius
= title
.endsWith(" f.");
1030 title
.replace(" f.", "");
1033 String
[] splits
= title
.split("\\s+");
1034 int nNotFirstName
= isFilius ?
2 : 1;
1035 person
.setLastname(splits
[splits
.length
- nNotFirstName
] + (isFilius?
" f." : ""));
1036 person
.setFirstname(CdmUtils
.concat(" ", Arrays
.copyOfRange(splits
, 0, splits
.length
-nNotFirstName
)));
1045 private Reference
<?
> getSecReference(CubaImportState state
) {
1046 Reference
<?
> result
= state
.getSecReference();
1047 if (result
== null){
1048 result
= ReferenceFactory
.newDatabase();
1049 result
.setTitle("Flora of Cuba");
1050 state
.setSecReference(result
);
1056 private static final String
[] nomStatusStrings
= new String
[]{"nom. cons.", "ined.", "nom. illeg.",
1057 "nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub.", "nom. nud."};
1062 private String
normalizeStatus(String nameStr
) {
1063 if (nameStr
== null){
1066 String result
= nameStr
.replaceAll(HOMONYM_MARKER
, "").trim();
1067 for (String nomStatusStr
: nomStatusStrings
){
1068 nomStatusStr
= " " + nomStatusStr
;
1069 if (result
.endsWith(nomStatusStr
)){
1070 result
= result
.replace(nomStatusStr
, "," + nomStatusStr
);
1073 result
= result
.replaceAll(DOUBTFUL_MARKER
, "").trim();
1074 result
= result
.replace("[taxon]", "[infraspec.]");
1086 private TaxonNode
getFamilyTaxon(HashMap
<String
, String
> record
, CubaImportState state
) {
1087 String familyStr
= getValue(record
, "Fam. default");
1088 if (familyStr
== null){
1091 familyStr
= familyStr
.trim();
1092 String alternativeFamilyStr
= null;
1093 if (familyStr
.contains("/")){
1094 String
[] splits
= familyStr
.split("/");
1095 if (splits
.length
> 2){
1096 logger
.warn(state
.getCurrentLine() +": " + "More than 1 alternative name:" + familyStr
);
1098 familyStr
= splits
[0].trim();
1099 alternativeFamilyStr
= splits
[1].trim();
1102 Taxon family
= state
.getHigherTaxon(familyStr
);
1103 TaxonNode familyNode
;
1104 if (family
!= null){
1105 familyNode
= family
.getTaxonNodes().iterator().next();
1107 BotanicalName name
= makeFamilyName(state
, familyStr
);
1108 Reference
<?
> sec
= getSecReference(state
);
1109 family
= Taxon
.NewInstance(name
, sec
);
1110 ITaxonTreeNode rootNode
= getClassification(state
);
1111 familyNode
= rootNode
.addChildTaxon(family
, sec
, null);
1112 state
.putHigherTaxon(familyStr
, family
);
1116 if (isNotBlank(alternativeFamilyStr
)){
1117 NameRelationshipType type
= NameRelationshipType
.ALTERNATIVE_NAME();
1118 BotanicalName alternativeName
= makeFamilyName(state
, alternativeFamilyStr
);
1119 BotanicalName familyName
= (BotanicalName
)family
.getName();
1120 boolean hasRelation
= false;
1121 for (NameRelationship nameRel
: familyName
.getRelationsToThisName()){
1122 if (nameRel
.getType().equals(type
)){
1123 if (nameRel
.getFromName().equals(alternativeName
)){
1129 familyName
.addRelationshipFromName(alternativeName
, type
, null);
1142 private void validateTaxonIsAbsent(CubaImportState state
, Taxon taxon
) {
1143 if (!state
.isTaxonIsAbsent()){
1147 for (DescriptionElementBase el
: taxon
.getDescriptions().iterator().next().getElements()){
1148 if (el
instanceof Distribution
){
1149 Distribution dist
= (Distribution
)el
;
1150 NamedArea area
= dist
.getArea();
1151 if (isCubanArea(area
)){
1152 PresenceAbsenceTerm status
= dist
.getStatus();
1153 if (status
!= null && !status
.isAbsenceTerm()){
1154 if (!isDoubtfulTerm(status
)){
1155 String name
= taxon
.getName().getTitleCache();
1156 logger
.error(state
.getCurrentLine() +": Taxon ("+name
+")is absent'[]' but has presence distribution: " + status
.getTitleCache());
1169 private void validateEndemic(CubaImportState state
, Taxon taxon
) {
1171 boolean hasExternalPresence
= false;
1172 for (DescriptionElementBase el
: taxon
.getDescriptions().iterator().next().getElements()){
1173 if (el
instanceof Distribution
){
1174 Distribution dist
= (Distribution
)el
;
1175 NamedArea area
= dist
.getArea();
1176 if (!isCubanArea(area
)){
1177 PresenceAbsenceTerm status
= dist
.getStatus();
1178 if (status
!= null && !status
.isAbsenceTerm()){
1179 if (!isDoubtfulTerm(status
)){
1180 hasExternalPresence
= true;
1181 if (state
.isEndemic()){
1182 String name
= taxon
.getName().getTitleCache();
1183 logger
.error(state
.getCurrentLine() +": Taxon ("+name
+")is endemic but has non-cuban distribution: " + area
.getIdInVocabulary() + "-" + status
.getIdInVocabulary());
1191 if (!state
.isEndemic() && ! hasExternalPresence
){
1192 String name
= taxon
.getName().getTitleCache();
1193 logger
.error(state
.getCurrentLine() +": Taxon ("+name
+")is not endemic but has no non-cuban distribution" );
1205 private Taxon
makeAlternativeFamilyTaxon(CubaImportState state
, String famStr
, Reference
<?
> famRef
) {
1206 String key
= famRef
.getTitle() + ":"+ famStr
;
1207 Taxon family
= state
.getHigherTaxon(key
);
1208 if (family
== null){
1209 BotanicalName name
= makeFamilyName(state
, famStr
);
1210 family
= Taxon
.NewInstance(name
, famRef
);
1211 state
.putHigherTaxon(key
, family
);
1223 private BotanicalName
makeFamilyName(CubaImportState state
, String famStr
) {
1224 BotanicalName name
= state
.getFamilyName(famStr
);
1226 name
= BotanicalName
.NewInstance(Rank
.FAMILY());
1227 name
.setGenusOrUninomial(famStr
);
1228 state
.putFamilyName(famStr
, name
);
1229 name
.addSource(makeOriginalSource(state
));
1239 private TaxonNode
getClassification(CubaImportState state
) {
1240 Classification classification
= state
.getClassification();
1241 if (classification
== null){
1242 classification
= getClassificationService().find(state
.getConfig().getClassificationUuid());
1244 TaxonNode rootNode
= state
.getRootNode();
1245 if (rootNode
== null){
1246 rootNode
= getTaxonNodeService().find(plantaeUuid
);
1248 if (rootNode
== null){
1249 Reference
<?
> sec
= getSecReference(state
);
1250 if (classification
== null){
1251 String classificationName
= state
.getConfig().getClassificationName();
1253 Language language
= Language
.DEFAULT();
1254 classification
= Classification
.NewInstance(classificationName
, sec
, language
);
1255 state
.setClassification(classification
);
1256 classification
.setUuid(state
.getConfig().getClassificationUuid());
1257 classification
.getRootNode().setUuid(rootUuid
);
1260 BotanicalName plantaeName
= BotanicalName
.NewInstance(Rank
.KINGDOM());
1261 plantaeName
.setGenusOrUninomial("Plantae");
1262 Taxon plantae
= Taxon
.NewInstance(plantaeName
, sec
);
1263 TaxonNode plantaeNode
= classification
.addChildTaxon(plantae
, null, null);
1264 plantaeNode
.setUuid(plantaeUuid
);
1265 state
.setRootNode(plantaeNode
);
1266 getClassificationService().save(classification
);
1268 rootNode
= plantaeNode
;
1276 * @param originalKey
1279 private String
getValue(HashMap
<String
, String
> record
, String originalKey
) {
1280 String value
= record
.get(originalKey
);
1281 if (! StringUtils
.isBlank(value
)) {
1282 if (logger
.isDebugEnabled()) { logger
.debug(originalKey
+ ": " + value
); }
1283 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
1293 * Stores taxa records in DB
1296 protected void firstPass(CubaImportState state
) {
1297 boolean isSynonymOnly
= false;
1299 String line
= state
.getCurrentLine() + ": ";
1300 HashMap
<String
, String
> record
= state
.getOriginalRecord();
1302 Set
<String
> keys
= record
.keySet();
1303 for (String key
: keys
) {
1304 if (! expectedKeys
.contains(key
)){
1305 logger
.warn(line
+ "Unexpected Key: " + key
);
1309 if (record
.get("Fam. default") == null && keys
.size() == 2 && record
.get("Syn.") == null && record
.get("Nat") != null && record
.get("Adv") != null){
1310 //second header line, don't handle
1315 TaxonNode familyTaxon
= getFamilyTaxon(record
, state
);
1316 if (familyTaxon
== null){
1317 if (record
.get("Taxón") != null){
1318 logger
.warn(line
+ "Family not recognized but taxon exists: " + record
.get("Taxón"));
1320 }else if (record
.get("Syn.") == null){
1321 logger
.warn(line
+ "Family not recognized but also no synonym exists");
1324 isSynonymOnly
= true;
1329 Taxon taxon
= makeTaxon(record
, state
, familyTaxon
, isSynonymOnly
);
1330 if (taxon
== null && ! isSynonymOnly
){
1331 logger
.warn(line
+ "taxon could not be created and is null");
1334 state
.setCurrentTaxon(taxon
);
1337 if (!isSynonymOnly
){
1338 makeAlternativeFamilies(record
, state
, familyTaxon
, taxon
);
1342 makeNotes(record
, state
);
1345 makeSynonyms(record
, state
, !isSynonymOnly
);
1347 //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
1348 makeCubanDistribution(record
, state
);
1351 // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1352 // "CuC","VC","Ci","SS","CA","Cam","LT",
1353 // "CuE","Gr","Ho","SC","Gu",
1354 makeProvincesDistribution(record
, state
);
1356 // "Esp","Ja","PR","Men","Bah","Cay",
1357 // "AmN","AmC","AmS","VM"});
1358 makeOtherAreasDistribution(record
, state
);
1360 validateTaxonIsAbsent(state
, taxon
);
1361 if (!isSynonymOnly
){
1362 validateEndemic(state
, taxon
);
1365 state
.setHighestStatusForTaxon(null);
1375 private IdentifiableSource
makeOriginalSource(CubaImportState state
) {
1376 return IdentifiableSource
.NewDataImportInstance("line: " + state
.getCurrentLine(), null, state
.getConfig().getSourceReference());
1382 private DescriptionElementSource
makeDescriptionSource(CubaImportState state
) {
1383 return DescriptionElementSource
.NewDataImportInstance("line: " + state
.getCurrentLine(), null, state
.getConfig().getSourceReference());
1386 private static Set
<UUID
> doubtfulStatus
= new HashSet
<>();
1392 private boolean isDoubtfulTerm(PresenceAbsenceTerm status
) {
1393 if (doubtfulStatus
.isEmpty()){
1394 doubtfulStatus
.add(CubaTransformer
.nonNativeDoubtfullyNaturalisedUuid
);
1395 doubtfulStatus
.add(CubaTransformer
.doubtfulIndigenousDoubtfulUuid
);
1396 doubtfulStatus
.add(CubaTransformer
.endemicDoubtfullyPresentUuid
);
1397 doubtfulStatus
.add(CubaTransformer
.naturalisedDoubtfullyPresentUuid
);
1398 doubtfulStatus
.add(CubaTransformer
.nonNativeDoubtfullyPresentUuid
);
1399 doubtfulStatus
.add(CubaTransformer
.occasionallyCultivatedUuid
);
1400 doubtfulStatus
.add(CubaTransformer
.rareCasualUuid
);
1401 doubtfulStatus
.add(PresenceAbsenceTerm
.NATIVE_PRESENCE_QUESTIONABLE().getUuid());
1402 doubtfulStatus
.add(PresenceAbsenceTerm
.CULTIVATED_PRESENCE_QUESTIONABLE().getUuid());
1404 boolean isDoubtful
= doubtfulStatus
.contains(status
.getUuid());
1413 private boolean isCubanArea(NamedArea area
) {
1414 if (area
.getUuid().equals(CubaTransformer
.uuidCuba
)){
1416 }else if (area
.getPartOf()!= null){
1417 return isCubanArea(area
.getPartOf());
1427 * @param familyTaxon
1430 private void makeAlternativeFamilies(HashMap
<String
, String
> record
,
1431 CubaImportState state
,
1432 TaxonNode familyTaxon
,
1435 String famFRC
= record
.get("Fam. FRC");
1436 String famAS
= record
.get("Fam. A&S");
1437 String famFC
= record
.get("Fam. FC");
1439 Reference
<?
> refFRC
= makeReference(state
, CubaTransformer
.uuidRefFRC
);
1440 Reference
<?
> refAS
= makeReference(state
, CubaTransformer
.uuidRefAS
);
1441 Reference
<?
> refFC
= makeReference(state
, CubaTransformer
.uuidRefFC
);
1443 makeSingleAlternativeFamily(state
, taxon
, famFRC
, refFRC
);
1444 makeSingleAlternativeFamily(state
, taxon
, famAS
, refAS
);
1445 makeSingleAlternativeFamily(state
, taxon
, famFC
, refFC
);
1454 private Reference
<?
> makeReference(CubaImportState state
, UUID uuidRef
) {
1455 Reference
<?
> ref
= state
.getReference(uuidRef
);
1457 ref
= getReferenceService().find(uuidRef
);
1458 state
.putReference(uuidRef
, ref
);
1470 private void makeSingleAlternativeFamily(CubaImportState state
, Taxon taxon
, String famStr
, Reference
<?
> famRef
) {
1471 if (isBlank(famStr
)){
1476 TaxonDescription desc
= getTaxonDescription(taxon
, false, true);
1481 altFamUuid1
= state
.getTransformer().getFeatureUuid("Alt.Fam.");
1482 altFamUuid2
= state
.getTransformer().getFeatureUuid("Alt.Fam.2");
1483 } catch (UndefinedTransformerMethodException e
) {
1484 throw new RuntimeException(e
);
1488 Taxon famTaxon
= makeAlternativeFamilyTaxon(state
, famStr
, famRef
);
1492 Feature feature1
= getFeature(state
, altFamUuid1
, "Families in other Floras (Text)", "Families in other Floras (Text)", "Other floras", null);
1493 feature1
.addRepresentation(Representation
.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language
.SPANISH_CASTILIAN()));
1494 // TextData textData = TextData.NewInstance(feature1, famStr, Language.DEFAULT(), null);
1495 TextData textData
= TextData
.NewInstance(feature1
, null, Language
.DEFAULT(), null);
1496 textData
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null,null, famRef
, null, famTaxon
.getName(),null);
1497 desc
.addElement(textData
);
1502 Feature feature2
= getFeature(state
, altFamUuid2
, "Families in other Floras", "Families in other Floras", "Other floras(2)", null);
1503 feature2
.setSupportsTaxonInteraction(true);
1504 feature2
.addRepresentation(Representation
.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language
.SPANISH_CASTILIAN()));
1505 TaxonInteraction taxInteract
= TaxonInteraction
.NewInstance(feature2
);
1506 textData
.putText(Language
.SPANISH_CASTILIAN(), "Familias en otras Floras");
1507 taxInteract
.setTaxon2(famTaxon
);
1508 taxInteract
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null,null, famRef
, null);
1509 desc
.addElement(taxInteract
);
1512 famTaxon
.addTaxonRelation(taxon
, TaxonRelationshipType
.INCLUDES(), taxon
.getSec(), null);
1525 // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1526 // "CuC","VC","Ci","SS","CA","Cam","LT",
1527 // "CuE","Gr","Ho","SC","Gu",
1528 private void makeProvincesDistribution(HashMap
<String
, String
> record
, CubaImportState state
) {
1529 List
<String
> areaKeys
= Arrays
.asList(new String
[]{
1530 "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1531 "CuC","VC","Ci","SS","CA","Cam","LT",
1532 "CuE","Gr","Ho","SC","Gu",
1534 for (String areaKey
: areaKeys
){
1535 state
.setCubanProvince(true);
1536 makeSingleProvinceDistribution(areaKey
, record
, state
);
1540 private void makeOtherAreasDistribution(HashMap
<String
, String
> record
, CubaImportState state
) {
1541 List
<String
> areaKeys
= Arrays
.asList(new String
[]{
1542 "Esp","Ja","PR","Men","Bah","Cay",
1543 "AmN","AmC","AmS","VM"});
1544 for (String areaKey
: areaKeys
){
1545 state
.setCubanProvince(false);
1546 makeSingleProvinceDistribution(areaKey
, record
, state
);
1557 * @param highestStatus
1559 * @throws UndefinedTransformerMethodException
1561 private PresenceAbsenceTerm
makeProvinceStatus(String areaKey
,
1562 HashMap
<String
, String
> record
,
1563 CubaImportState state
) throws UndefinedTransformerMethodException
{
1565 String statusStr
= record
.get(areaKey
);
1566 if (statusStr
== null){
1569 statusStr
= statusStr
.trim();
1571 PresenceAbsenceTerm status
= state
.getTransformer().getPresenceTermByKey(statusStr
);
1572 if (status
== null){
1573 // PresenceAbsenceTerm highestStatus = state.getHighestStatusForTaxon();
1574 if (state
.isCubanProvince() && isMinus(statusStr
)){
1575 // getAbsenceTermForStatus(state, highestStatus);
1576 //we now handle cuban provinces same as external regions
1577 status
= state
.getTransformer().getPresenceTermByKey("--");
1578 }else if (! state
.isCubanProvince() && isMinus(statusStr
)){
1579 status
= state
.getTransformer().getPresenceTermByKey("--");
1581 // logger.warn("Unhandled status str for provinces / external regions: " + statusStr);
1582 UUID statusUuid
= state
.getTransformer().getPresenceTermUuid(statusStr
);
1583 if (statusUuid
== null){
1584 logger
.error(state
.getCurrentLine() + ": Undefined status str for provinces / external regions. No UUID given: '" + statusStr
+ "'");
1586 status
= getPresenceTerm(state
, statusUuid
, statusStr
, statusStr
, statusStr
, false);
1596 * @param highestStatus
1597 * @throws UndefinedTransformerMethodException
1599 private PresenceAbsenceTerm
getAbsenceTermForStatus(CubaImportState state
, PresenceAbsenceTerm highestStatus
) throws UndefinedTransformerMethodException
{
1600 if (highestStatus
== null){
1601 logger
.warn(state
.getCurrentLine() + ": Highest status not defined");
1604 PresenceAbsenceTerm result
= null;
1605 if (highestStatus
.equals(getStatus(state
, "E"))){
1606 result
= getStatus(state
, "-E");
1607 }else if (highestStatus
.getUuid().equals(state
.getTransformer().getPresenceTermUuid("Ind.")) || highestStatus
.equals(PresenceAbsenceTerm
.NATIVE())){
1608 result
= getStatus(state
, "-Ind.");
1609 }else if (highestStatus
.equals(getStatus(state
, "Ind.?"))){
1610 result
= getStatus(state
, "-Ind.?"); //TODO
1611 }else if (highestStatus
.equals(getStatus(state
, "N"))){
1612 result
= getStatus(state
, "-N");
1613 }else if (highestStatus
.equals(getStatus(state
, "P"))){
1614 result
= getStatus(state
, "-P");
1615 }else if (highestStatus
.equals(getStatus(state
, "A"))){
1616 result
= getStatus(state
, "-A");
1617 }else if (highestStatus
.equals(getStatus(state
, "C"))){
1618 result
= getStatus(state
, "-C");
1620 logger
.warn(state
.getCurrentLine() + ": Absent province status could not be defined for highest status " + highestStatus
.getTitleCache());
1628 * @throws UndefinedTransformerMethodException
1630 private PresenceAbsenceTerm
getStatus(CubaImportState state
, String key
) throws UndefinedTransformerMethodException
{
1631 PresenceAbsenceTerm status
= state
.getTransformer().getPresenceTermByKey(key
);
1632 if (status
== null){
1633 UUID statusUuid
= state
.getTransformer().getPresenceTermUuid(key
);
1634 status
= getPresenceTerm(state
, statusUuid
, null, null, null, false);
1641 * Stores parent-child, synonym and common name relationships
1644 protected void secondPass(CubaImportState state
) {
1645 // CyprusRow cyprusRow = state.getCyprusRow();
1651 protected boolean isIgnore(CubaImportState state
) {
1652 return ! state
.getConfig().isDoTaxa();
1656 protected boolean doCheck(CubaImportState state
) {
1657 logger
.warn("DoCheck not yet implemented for CubaExcelImport");