2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.cuba
;
12 import java
.util
.ArrayList
;
13 import java
.util
.Arrays
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
16 import java
.util
.List
;
18 import java
.util
.UUID
;
19 import java
.util
.regex
.Matcher
;
20 import java
.util
.regex
.Pattern
;
22 import org
.apache
.commons
.lang
.StringUtils
;
23 import org
.apache
.log4j
.Logger
;
24 import org
.springframework
.stereotype
.Component
;
26 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
27 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
28 import eu
.etaxonomy
.cdm
.io
.excel
.common
.ExcelImporterBase
;
29 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
30 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
31 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
32 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
33 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
34 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
35 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
36 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
37 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTerm
;
38 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
39 import eu
.etaxonomy
.cdm
.model
.description
.TaxonInteraction
;
40 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
41 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
42 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
43 import eu
.etaxonomy
.cdm
.model
.name
.NameRelationshipType
;
44 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
45 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
46 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
47 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
48 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
49 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
50 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
51 import eu
.etaxonomy
.cdm
.model
.taxon
.ITaxonTreeNode
;
52 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationship
;
53 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
54 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
55 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
56 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonRelationshipType
;
57 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
58 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
66 public class CubaExcelImport
extends ExcelImporterBase
<CubaImportState
> {
67 private static final long serialVersionUID
= -747486709409732371L;
68 private static final Logger logger
= Logger
.getLogger(CubaExcelImport
.class);
70 private static final String HOMONYM_MARKER
= "\\s+homon.?$";
71 private static final String DOUBTFUL_MARKER
= "^\\?\\s?";
74 private static UUID rootUuid
= UUID
.fromString("206d42e4-ac32-4f20-a093-14826014e667");
75 private static UUID plantaeUuid
= UUID
.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
77 private static INonViralNameParser
<?
> nameParser
= NonViralNameParserImpl
.NewInstance();
78 private static NomenclaturalCode nc
= NomenclaturalCode
.ICNAFP
;
80 private static List
<String
> expectedKeys
= Arrays
.asList(new String
[]{
81 "Fam. default","Fam. FRC","Fam. A&S","Fam. FC",
82 "Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
85 protected void analyzeRecord(HashMap
<String
, String
> record
, CubaImportState state
) {
86 //we do everything in firstPass here
96 private void makeCubanDistribution(HashMap
<String
, String
> record
, CubaImportState state
) {
98 NamedArea cuba
= getNamedArea(state
, state
.getTransformer().getNamedAreaUuid("C"), null, null, null, null, null);
99 TaxonDescription desc
= getTaxonDescription(state
.getCurrentTaxon(), false, true);
100 List
<PresenceAbsenceTerm
> statuss
= makeCubanStatuss(record
, state
);
101 for (PresenceAbsenceTerm status
: statuss
){
102 Distribution distribution
= Distribution
.NewInstance(cuba
, status
);
103 desc
.addElement(distribution
);
105 } catch (UndefinedTransformerMethodException e
) {
115 * @throws UndefinedTransformerMethodException
117 private List
<PresenceAbsenceTerm
> makeCubanStatuss(HashMap
<String
, String
> record
, CubaImportState state
) throws UndefinedTransformerMethodException
{
118 boolean isAbsent
= false; //TODO
119 PresenceAbsenceTerm highestStatus
= null;
121 String line
= state
.getCurrentLine() + ": ";
122 List
<PresenceAbsenceTerm
> result
= new ArrayList
<>();
124 String endemicStr
= getValue(record
, "End");
125 String indigenousStr
= getValue(record
, "Ind");
126 String indigenousDoubtStr
= getValue(record
, "Ind? D");
127 String naturalisedStr
= getValue(record
, "Nat");
128 String dudStr
= getValue(record
, "Dud P");
129 String advStr
= getValue(record
, "Adv");
130 String cultStr
= getValue(record
, "Cult C");
132 if (endemicStr
!= null){
133 if(endemicStr
.equals("+")){
134 PresenceAbsenceTerm endemicState
= state
.getTransformer().getPresenceTermByKey("E");
135 result
.add(endemicState
);
136 highestStatus
= endemicState
;
137 }else if(isMinus(endemicStr
)){
138 UUID endemicUuid
= state
.getTransformer().getPresenceTermUuid("-E");
139 PresenceAbsenceTerm endemicState
= getPresenceTerm(state
, endemicUuid
, null, null, null, false);
140 result
.add(endemicState
);
141 checkAbsentHighestState(highestStatus
, line
, "endemic", false);
142 }else if(endemicStr
.equals("?")){
143 UUID endemicDoubtfulUuid
= state
.getTransformer().getPresenceTermUuid("?E");
144 PresenceAbsenceTerm endemicState
= getPresenceTerm(state
, endemicDoubtfulUuid
, null, null, null, false);
145 result
.add(endemicState
);
146 checkAbsentHighestState(highestStatus
, line
, "endemic", false);
148 logger
.warn(line
+ "Endemic not recognized: " + endemicStr
);
151 if (indigenousStr
!= null){
152 if(indigenousStr
.equals("+")){
153 UUID indigenousUuid
= state
.getTransformer().getPresenceTermUuid("Ind.");
154 PresenceAbsenceTerm indigenousState
= getPresenceTerm(state
, indigenousUuid
, null, null, null, false);
155 result
.add(indigenousState
);
156 highestStatus
= highestStatus
!= null ? highestStatus
: indigenousState
;
157 }else if(isMinus(indigenousStr
)){
158 PresenceAbsenceTerm indigenousState
= state
.getTransformer().getPresenceTermByKey("-Ind.");
159 result
.add(indigenousState
);
160 checkAbsentHighestState(highestStatus
, line
, "indigenous", false);
161 }else if(indigenousStr
.equals("?")){
162 UUID indigenousDoubtUuid
= state
.getTransformer().getPresenceTermUuid("?Ind.");
163 PresenceAbsenceTerm indigenousDoubtState
= getPresenceTerm(state
, indigenousDoubtUuid
, null, null, null, false);
164 result
.add(indigenousDoubtState
);
165 checkAbsentHighestState(highestStatus
, line
, "indigenous", true);
167 logger
.warn(line
+ "Indigenous not recognized: " + indigenousStr
);
170 if(indigenousDoubtStr
!= null){
171 if(indigenousDoubtStr
.equals("D")){
172 UUID indigenousDoubtUuid
= state
.getTransformer().getPresenceTermUuid("Ind.?");
173 PresenceAbsenceTerm indigenousDoubtState
= getPresenceTerm(state
, indigenousDoubtUuid
, null, null, null, false);
174 result
.add(indigenousDoubtState
);
175 highestStatus
= highestStatus
!= null ? highestStatus
: indigenousDoubtState
;
177 logger
.warn(line
+ "Indigenous doubtful not recognized: " + indigenousDoubtStr
);
180 if(naturalisedStr
!= null){
181 if(naturalisedStr
.equals("N")){
182 PresenceAbsenceTerm haturalizedState
= state
.getTransformer().getPresenceTermByKey("Nat.");
183 result
.add(haturalizedState
);
184 highestStatus
= highestStatus
!= null ? highestStatus
: haturalizedState
;
185 }else if(isMinus(naturalisedStr
)){
186 UUID naturalisedErrorUuid
= state
.getTransformer().getPresenceTermUuid("-Nat.");
187 PresenceAbsenceTerm naturalisedErrorState
= getPresenceTerm(state
, naturalisedErrorUuid
, null, null, null, false);
188 result
.add(naturalisedErrorState
);
189 checkAbsentHighestState(highestStatus
, line
, "naturalized", false);
190 }else if(naturalisedStr
.equals("?")){
191 UUID naturalisedDoubtUuid
= state
.getTransformer().getPresenceTermUuid("?Nat.");
192 PresenceAbsenceTerm naturalisedDoubtState
= getPresenceTerm(state
, naturalisedDoubtUuid
, null, null, null, false);
193 result
.add(naturalisedDoubtState
);
194 checkAbsentHighestState(highestStatus
, line
, "naturalized", true);
196 logger
.warn(line
+ "Naturalized not recognized: " + naturalisedStr
);
200 if(dudStr
.equals("P")){
201 UUID dudUuid
= state
.getTransformer().getPresenceTermUuid("Dud.");
202 PresenceAbsenceTerm dudState
= getPresenceTerm(state
, dudUuid
, null, null, null, false);
203 result
.add(dudState
);
204 highestStatus
= highestStatus
!= null ? highestStatus
: dudState
;
205 }else if(isMinus(dudStr
)){
206 UUID nonNativeErrorUuid
= state
.getTransformer().getPresenceTermUuid("-Dud.");
207 PresenceAbsenceTerm nonNativeErrorState
= getPresenceTerm(state
, nonNativeErrorUuid
, null, null, null, false);
208 result
.add(nonNativeErrorState
);
209 checkAbsentHighestState(highestStatus
, line
, "non-native and doubtfully naturalised", false);
210 }else if(dudStr
.equals("?")){
211 UUID naturalisedDoubtUuid
= state
.getTransformer().getPresenceTermUuid("?Dud.");
212 PresenceAbsenceTerm naturalisedDoubtState
= getPresenceTerm(state
, naturalisedDoubtUuid
, null, null, null, false);
213 result
.add(naturalisedDoubtState
);
214 checkAbsentHighestState(highestStatus
, line
, "non-native and doubtfully naturalised", true);
216 logger
.warn(line
+ "non-native and doubtfully naturalised not recognized: " + dudStr
);
220 if(advStr
.equals("A")){
221 UUID advUuid
= state
.getTransformer().getPresenceTermUuid("Adv.");
222 PresenceAbsenceTerm advState
= getPresenceTerm(state
, advUuid
, null, null, null, false);
223 result
.add(advState
);
224 highestStatus
= highestStatus
!= null ? highestStatus
: advState
;
225 }else if(isMinus(advStr
)){
226 UUID advUuid
= state
.getTransformer().getPresenceTermUuid("-Adv.");
227 PresenceAbsenceTerm advState
= getPresenceTerm(state
, advUuid
, null, null, null, false);
228 result
.add(advState
);
229 checkAbsentHighestState(highestStatus
, line
, "adventive", false);
231 logger
.warn(line
+ "'adventive (casual) alien' not recognized: " + advStr
);
233 }else if(cultStr
!= null){
234 if (! (cultStr
.matches("(C|\\(C\\)|\\?|–)"))){
235 logger
.warn("'cultivated' not recognized: " + cultStr
);
236 }else if(cultStr
.equals("C")){
237 PresenceAbsenceTerm cultivatedState
= state
.getTransformer().getPresenceTermByKey("Cult.");
238 result
.add(cultivatedState
);
239 highestStatus
= highestStatus
!= null ? highestStatus
: cultivatedState
;
240 }else if(cultStr
.equals("?")){
241 PresenceAbsenceTerm cultivatedState
= state
.getTransformer().getPresenceTermByKey("?Cult.");
242 result
.add(cultivatedState
);
243 checkAbsentHighestState(highestStatus
, line
, "cultivated", true);
244 }else if(cultStr
.equals("(C)")){
245 UUID ocassualCultUuid
= state
.getTransformer().getPresenceTermUuid("(C)");
246 PresenceAbsenceTerm cultivatedState
= getPresenceTerm(state
, ocassualCultUuid
, null, null, null, false);
247 result
.add(cultivatedState
);
248 }else if(isMinus(cultStr
)){
249 PresenceAbsenceTerm cultivatedState
= state
.getTransformer().getPresenceTermByKey("-Cult.");
250 result
.add(cultivatedState
);
251 checkAbsentHighestState(highestStatus
, line
, "cultivated", false);
253 logger
.warn(line
+ "'cultivated' not recognized: " + cultStr
);
256 state
.setHighestStatusForTaxon(highestStatus
);
262 * @param highestStatus
265 private void checkAbsentHighestState(PresenceAbsenceTerm highestStatus
, String line
, String stateLabel
, boolean doubtful
) {
266 if (highestStatus
== null){
267 String absentStr
= doubtful ?
"doubtful" : "absent";
268 logger
.warn(line
+ "Highest cuban state is " + absentStr
+ " " + stateLabel
);
275 * @param indigenousStr
278 private boolean isMinus(String str
) {
279 return str
.equals("-") || str
.equals("–");
284 * @param indigenousStr
287 private boolean checkPlusMinusDoubt(String str
) {
288 return str
.equals("+") || isMinus(str
)|| str
.equals("?");
293 * @param indigenousStr
294 * @param indigenousDoubtStr
295 * @param naturalisedStr
300 private boolean checkAllNull(String
... others
) {
301 for (String other
: others
){
310 private static final String acceptedRegExStr
= "\\(([^\\[\\]“”]{6,})\\)";
311 // String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
312 // + "(\\((.{6,})\\))?";
313 private static final String heterotypicRegExStr
= "([^\\(\\[\\]“”]{5,})"
315 private static final String heterotypicRegExStr_TEST
= "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
317 private static final String auctRegExStr
= "auct\\."
318 +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sCombs|\\sBritton|\\sGriseb\\.|\\sWright"
319 + "|\\sHammer|\\sEngl\\.||\\sMaza|\\sMiers|\\sRoig|\\sBorhidi|\\sFRC|\\sCoL"
320 + "|\\sAckerman|\\sMújica|\\sDíaz|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
323 private static final String missapliedRegExStr
= "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr
+ "|sensu\\s+.{2,})";
324 private static final String sphalmRegExStr
= "“(.*{5,})”\\s+((FC-S|A&S)\\s)?sphalm\\.(\\s(FC(-S)?|A&S|inval\\.))?";
325 private static final String nomInvalRegExStr
= "“(.*{5,})”\\s+nom\\.\\s+inval\\.(\\s(West|Moldenke|FC|Jacq.))?";
326 private static final String homonymRegExStr
= "\\s*(\\[.*\\])*\\s*";
328 private static final Pattern acceptedRegEx
= Pattern
.compile(acceptedRegExStr
+ homonymRegExStr
);
329 private static final Pattern heterotypicRegEx
= Pattern
.compile(heterotypicRegExStr
+ homonymRegExStr
);
330 private static final Pattern missapliedRegEx
= Pattern
.compile(missapliedRegExStr
);
331 private static final Pattern nomInvalRegEx
= Pattern
.compile(nomInvalRegExStr
);
332 private static final Pattern sphalmRegEx
= Pattern
.compile(sphalmRegExStr
);
339 private void makeSynonyms(HashMap
<String
, String
> record
, CubaImportState state
) {
340 // boolean forAccepted = true;
341 String synonymStr
= record
.get("Syn.");
342 String line
= state
.getCurrentLine() + ": ";
344 if (synonymStr
== null){
345 //TODO test that this is not a synonym only line
348 synonymStr
= synonymStr
.trim();
350 // String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
351 // String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
353 // Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
355 Matcher missapliedMatcher
= missapliedRegEx
.matcher(synonymStr
);
356 Matcher nomInvalMatcher
= nomInvalRegEx
.matcher(synonymStr
);
357 Matcher acceptedMatcher
= acceptedRegEx
.matcher(synonymStr
);
358 Matcher heterotypicMatcher
= heterotypicRegEx
.matcher(synonymStr
);
359 Matcher sphalmMatcher
= sphalmRegEx
.matcher(synonymStr
);
361 List
<BotanicalName
> homonyms
= new ArrayList
<>();
362 if (missapliedMatcher
.matches()){
363 boolean doubtful
= missapliedMatcher
.group(1) != null;
364 String firstPart
= missapliedMatcher
.group(2);
365 BotanicalName name
= (BotanicalName
)nameParser
.parseSimpleName(firstPart
, state
.getConfig().getNomenclaturalCode(), Rank
.SPECIES());
367 String secondPart
= missapliedMatcher
.group(3);
368 Taxon misappliedNameTaxon
= Taxon
.NewInstance(name
, null);
369 misappliedNameTaxon
.setDoubtful(doubtful
);
370 if (secondPart
.startsWith("sensu")){
371 secondPart
= secondPart
.substring(5).trim();
372 if (secondPart
.contains(" ")){
373 logger
.warn(line
+ "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart
);
375 Reference
<?
> sensu
= ReferenceFactory
.newGeneric();
376 Team team
= Team
.NewTitledInstance(secondPart
, null);
377 sensu
.setAuthorship(team
);
378 misappliedNameTaxon
.setSec(sensu
);
379 }else if (secondPart
.matches(auctRegExStr
)){
380 secondPart
= secondPart
.replace("p. p.", "p.p.");
381 misappliedNameTaxon
.setAppendedPhrase(secondPart
);
383 logger
.warn(line
+ "Misapplied second part not recognized: " + secondPart
);
386 Reference
<?
> relRef
= null;
387 state
.getCurrentTaxon().addMisappliedName(misappliedNameTaxon
, relRef
, null);
388 }else if (nomInvalMatcher
.matches()){
389 String firstPart
= nomInvalMatcher
.group(1);
390 String afterInval
= nomInvalMatcher
.group(2);
391 if (StringUtils
.isNotBlank(afterInval
)){
392 logger
.warn(state
.getCurrentLine() + ": After inval to be implemented: " + afterInval
);
394 BotanicalName name
= (BotanicalName
)nameParser
.parseSimpleName(firstPart
, state
.getConfig().getNomenclaturalCode(), Rank
.SPECIES());
395 NomenclaturalStatus status
= NomenclaturalStatus
.NewInstance( NomenclaturalStatusType
.INVALID());
396 name
.addStatus(status
);
397 state
.getCurrentTaxon().addSynonymName(name
, SynonymRelationshipType
.SYNONYM_OF());
398 }else if (sphalmMatcher
.matches()){
399 String firstPart
= sphalmMatcher
.group(1);
400 String sphalmPart
= synonymStr
.replace(firstPart
, "").replace("“","").replace("”","").trim();
401 BotanicalName name
= (BotanicalName
)nameParser
.parseSimpleName(firstPart
, state
.getConfig().getNomenclaturalCode(), Rank
.SPECIES());
402 // NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
403 // name.addStatus(status);
404 SynonymRelationship sr
= state
.getCurrentTaxon().addSynonymName(name
, SynonymRelationshipType
.SYNONYM_OF());
405 sr
.getSynonym().setAppendedPhrase(sphalmPart
);
406 sr
.getSynonym().setSec(null);
407 }else if (acceptedMatcher
.matches()){
408 String firstPart
= acceptedMatcher
.group(1);
409 String homonymPart
= acceptedMatcher
.groupCount() < 2 ?
null : acceptedMatcher
.group(2);
410 handleHomotypicGroup(firstPart
, state
, (BotanicalName
)state
.getCurrentTaxon().getName(), false, homonyms
, homonymPart
, false);
411 }else if(heterotypicMatcher
.matches()){
412 String firstPart
= heterotypicMatcher
.group(1).trim();
413 String secondPart
= heterotypicMatcher
.groupCount() < 3 ?
null : heterotypicMatcher
.group(3);
414 String homonymPart
= heterotypicMatcher
.groupCount() < 4 ?
null : heterotypicMatcher
.group(4);
415 boolean isDoubtful
= firstPart
.matches("^\\?\\s*.*");
416 firstPart
= replaceHomonIlleg(firstPart
);
417 boolean isHomonym
= firstPart
.matches(".*" + HOMONYM_MARKER
);
418 BotanicalName synName
= makeName(firstPart
);
419 if (synName
.isProtectedTitleCache()){
420 logger
.warn(line
+ " heterotypic base synonym could not be parsed correctly:" + firstPart
);
423 homonyms
.add(synName
);
425 SynonymRelationship sr
= state
.getCurrentTaxon().addHeterotypicSynonymName(synName
);
426 sr
.getSynonym().setDoubtful(isDoubtful
);
427 handleHomotypicGroup(secondPart
, state
, synName
, true, homonyms
, homonymPart
, isDoubtful
);
429 logger
.warn(line
+ "Synonym entry does not match: " + synonymStr
);
442 * @param homotypicalGroup
444 private void handleHomotypicGroup(String homotypicStr
,
445 CubaImportState state
,
446 BotanicalName homotypicName
,
447 boolean isHeterotypic
,
448 List
<BotanicalName
> homonyms
,
450 boolean isDoubtful
) {
452 if (homotypicStr
== null){
454 }else if (homotypicStr
.startsWith("(") && homotypicStr
.endsWith("")){
455 homotypicStr
= homotypicStr
.substring(1, homotypicStr
.length() - 1);
458 BotanicalName currentBasionym
= homotypicName
;
459 String
[] splits
= homotypicStr
.split("\\s*,\\s*");
460 for (String split
: splits
){
461 split
= replaceHomonIlleg(split
);
462 boolean isHomonym
= split
.matches(".*" + HOMONYM_MARKER
);
463 BotanicalName newName
= makeName(split
);
464 if (newName
.isProtectedTitleCache()){
465 logger
.warn(state
.getCurrentLine() + ": homotypic name part could not be parsed: " + split
);
468 homonyms
.add(newName
);
471 SynonymRelationship sr
= state
.getCurrentTaxon().addHeterotypicSynonymName(newName
, homotypicName
.getHomotypicalGroup(), null, null);
472 sr
.getSynonym().setDoubtful(isDoubtful
);
473 // newName.addBasionym(homotypicName);
474 currentBasionym
= handleBasionym(currentBasionym
, newName
);
476 state
.getCurrentTaxon().addHomotypicSynonymName(newName
, null, null);
477 handleBasionym(currentBasionym
, newName
);
480 makeHomonyms(homonyms
, homonymPart
, state
, currentBasionym
);
488 private String
replaceHomonIlleg(String split
) {
489 String result
= split
.trim().replace("homon. illeg.", "nom. illeg. homon.").trim();
498 * @param currentBasionym
500 private void makeHomonyms(List
<BotanicalName
> homonyms
, String homonymPartOrig
, CubaImportState state
,
501 BotanicalName currentBasionym
) {
502 String line
= state
.getCurrentLine() + ": ";
503 String homonymPart
= homonymPartOrig
== null ?
"" : homonymPartOrig
.trim();
504 if (homonyms
.isEmpty() && homonymPart
.equals("")){
506 }else if (homonymPart
.equals("")){
507 logger
.warn(line
+ "SynonymPart has homonyms but homonymPart is empty");
510 homonymPart
= homonymPart
.substring(1, homonymPart
.length() - 1);
511 String
[] splits
= homonymPart
.split("\\]\\s*\\[");
512 if (splits
.length
!= homonyms
.size()){
513 if(homonyms
.size() == 0 && splits
.length
>= 1){
514 handleSimpleBlockingNames(splits
, state
, currentBasionym
);
516 logger
.warn(line
+ "Number of homonyms (" + homonyms
.size() + ") and homonymParts ("+splits
.length
+") does not match");
521 for (String split
: splits
){
522 split
= split
.replaceAll("^non\\s+", "");
523 BotanicalName newName
= makeName(split
);
524 // BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
525 if (newName
.isProtectedTitleCache()){
526 logger
.warn(state
.getCurrentLine() + ": homonym name could not be parsed: " + split
);
528 newName
.addRelationshipToName(homonyms
.get(i
), NameRelationshipType
.LATER_HOMONYM(), null);
537 * @param currentBasionym
539 private void handleSimpleBlockingNames(String
[] splitsi
, CubaImportState state
,
540 BotanicalName currentBasionym
) {
541 for (String spliti
: splitsi
){
543 String split
= spliti
.replaceAll("^non\\s+", "");
544 BotanicalName newName
= makeName(split
);
545 if (newName
.isProtectedTitleCache()){
546 logger
.warn(state
.getCurrentLine() + ": blocking name could not be parsed: " + split
);
548 Set
<BotanicalName
> typifiedNames
= (Set
)currentBasionym
.getHomotypicalGroup().getTypifiedNames();
549 Set
<BotanicalName
> candidates
= new HashSet
<>();
550 for (BotanicalName name
: typifiedNames
){
551 if (name
.getGenusOrUninomial() != null && name
.getGenusOrUninomial().equals(newName
.getGenusOrUninomial())){
552 if (name
.getStatus().isEmpty() || ! name
.getStatus().iterator().next().getType().equals(NomenclaturalStatusType
.ILLEGITIMATE())){
553 candidates
.add(name
);
557 if (candidates
.size() == 1){
558 newName
.addRelationshipToName(candidates
.iterator().next(), NameRelationshipType
.BLOCKING_NAME_FOR(), null);
560 logger
.warn(state
.getCurrentLine() + ": Blocking name could not be handled. " + candidates
.size() + " candidates.");
568 * @param homotypicName
571 private BotanicalName
handleBasionym(BotanicalName currentBasionym
, BotanicalName name2
) {
572 BotanicalName basionymName
= currentBasionym
;
573 BotanicalName newCombination
= name2
;
574 //switch if necessary
575 if (basionymName
.getBasionymAuthorship() != null && newCombination
.getBasionymAuthorship() == null){
576 basionymName
= name2
;
577 newCombination
= currentBasionym
;
579 // newCombination.getHomotypicalGroup().removeGroupBasionym(xxx);
580 if (matchAuthor(basionymName
.getCombinationAuthorship(), newCombination
.getBasionymAuthorship())){
581 newCombination
.getHomotypicalGroup().setGroupBasionym(basionymName
);
588 * @param combinationAuthorship
592 private boolean matchAuthor(TeamOrPersonBase
<?
> author1
, TeamOrPersonBase
<?
> author2
) {
593 if (author1
== null || author2
== null){
596 return author1
.getNomenclaturalTitle().equals(author2
.getNomenclaturalTitle());
606 private void makeNotes(HashMap
<String
, String
> record
, CubaImportState state
) {
607 String notesStr
= getValue(record
, "(Notas)");
608 if (notesStr
== null){
611 Annotation annotation
= Annotation
.NewDefaultLanguageInstance(notesStr
);
613 annotation
.setAnnotationType(AnnotationType
.TECHNICAL());
614 state
.getCurrentTaxon().addAnnotation(annotation
);
625 private Taxon
makeTaxon(HashMap
<String
, String
> record
, CubaImportState state
, TaxonNode familyNode
, boolean isSynonym
) {
626 String taxonStr
= getValue(record
, "Taxón");
627 if (taxonStr
== null){
628 return isSynonym ? state
.getCurrentTaxon() : null;
630 boolean isAbsent
= false;
631 if (taxonStr
.startsWith("[") && taxonStr
.endsWith("]")){
632 taxonStr
= taxonStr
.substring(1, taxonStr
.length() - 1);
636 BotanicalName botanicalName
= makeName(taxonStr
);
637 Reference
<?
> sec
= getSecReference(state
);
638 Taxon taxon
= Taxon
.NewInstance(botanicalName
, sec
);
639 TaxonNode higherNode
;
640 if (botanicalName
.isProtectedTitleCache()){
641 logger
.warn(state
.getCurrentLine() + ": Taxon could not be parsed: " + taxonStr
);
642 higherNode
= familyNode
;
644 String genusStr
= botanicalName
.getGenusOrUninomial();
645 Taxon genus
= state
.getHigherTaxon(genusStr
);
647 higherNode
= genus
.getTaxonNodes().iterator().next();
649 BotanicalName name
= BotanicalName
.NewInstance(Rank
.GENUS());
650 name
.setGenusOrUninomial(genusStr
);
651 genus
= Taxon
.NewInstance(name
, sec
);
652 higherNode
= familyNode
.addChildTaxon(genus
, null, null);
653 state
.putHigherTaxon(genusStr
, genus
);
657 higherNode
.addChildTaxon(taxon
, null, null);
662 private final String orthVarRegExStr
= "[A-Z][a-z]+\\s[a-z]+\\s(\\(‘([a-z]){3,}’\\))\\s(\\([A-Z][a-z]+\\.?\\)\\s)?[A-Z][a-zó]+\\.?";
663 private final Pattern orthVarRegEx
= Pattern
.compile(orthVarRegExStr
);
668 private BotanicalName
makeName(String nameStrOrig
) {
670 String nameStr
= normalizeStatus(nameStrOrig
);
672 Matcher orthVarMatcher
= orthVarRegEx
.matcher(nameStr
);
673 String orthVar
= null;
674 if (orthVarMatcher
.matches()) {
675 orthVar
= orthVarMatcher
.group(1);
676 nameStr
= nameStr
.replace(" " + orthVar
, "").trim().replaceAll("\\s{2,}", " ");
677 orthVar
= orthVar
.substring(2, orthVar
.length() - 2);
680 BotanicalName result
= (BotanicalName
)nameParser
.parseReferencedName(nameStr
, nc
, Rank
.SPECIES());
681 if (orthVar
!= null){
682 BotanicalName orthVarName
= (BotanicalName
)result
.clone();
684 Reference
<?
> citation
= null;
685 orthVarName
.addRelationshipToName(result
, NameRelationshipType
.ORTHOGRAPHIC_VARIANT(), citation
, null, null);
686 orthVarName
.setSpecificEpithet(orthVar
);
696 private Reference
<?
> getSecReference(CubaImportState state
) {
697 Reference
<?
> result
= state
.getSecReference();
699 result
= ReferenceFactory
.newDatabase();
700 result
.setTitle("Flora of Cuba");
701 state
.setSecReference(result
);
707 private static final String
[] nomStatusStrings
= new String
[]{"nom. cons.", "ined.", "nom. illeg.",
708 "nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub.", "nom. nud."};
713 private String
normalizeStatus(String nameStr
) {
714 if (nameStr
== null){
717 String result
= nameStr
.replaceAll(HOMONYM_MARKER
, "").trim();
718 for (String nomStatusStr
: nomStatusStrings
){
719 nomStatusStr
= " " + nomStatusStr
;
720 if (result
.endsWith(nomStatusStr
)){
721 result
= result
.replace(nomStatusStr
, "," + nomStatusStr
);
724 result
= result
.replaceAll(DOUBTFUL_MARKER
, "").trim();
725 result
= result
.replace("[taxon]", "[infraspec.]");
737 private TaxonNode
getFamilyTaxon(HashMap
<String
, String
> record
, CubaImportState state
) {
738 String familyStr
= getValue(record
, "Fam. default");
739 if (familyStr
== null){
742 Taxon family
= state
.getHigherTaxon(familyStr
);
743 TaxonNode familyNode
;
745 familyNode
= family
.getTaxonNodes().iterator().next();
747 BotanicalName name
= state
.getFamilyName(familyStr
);
749 name
= BotanicalName
.NewInstance(Rank
.FAMILY());
750 name
.setGenusOrUninomial(familyStr
);
751 state
.putFamilyName(familyStr
, name
);
753 Reference
<?
> sec
= getSecReference(state
);
754 Taxon taxon
= Taxon
.NewInstance(name
, sec
);
755 ITaxonTreeNode rootNode
= getClassification(state
);
756 familyNode
= rootNode
.addChildTaxon(taxon
, sec
, null);
757 state
.putHigherTaxon(familyStr
, taxon
);
770 private Taxon
makeAlternativeFamilyTaxon(CubaImportState state
, String famStr
, Reference
<?
> famRef
) {
771 String key
= famRef
.getTitle() + ":"+ famStr
;
772 Taxon family
= state
.getHigherTaxon(key
);
774 BotanicalName name
= state
.getFamilyName(famStr
);
776 name
= BotanicalName
.NewInstance(Rank
.FAMILY());
777 name
.setGenusOrUninomial(famStr
);
778 state
.putFamilyName(famStr
, name
);
780 family
= Taxon
.NewInstance(name
, famRef
);
781 state
.putHigherTaxon(key
, family
);
792 private TaxonNode
getClassification(CubaImportState state
) {
793 Classification classification
= state
.getClassification();
794 if (classification
== null){
795 classification
= getClassificationService().find(state
.getConfig().getClassificationUuid());
797 TaxonNode rootNode
= state
.getRootNode();
798 if (rootNode
== null){
799 rootNode
= getTaxonNodeService().find(plantaeUuid
);
801 if (rootNode
== null){
802 Reference
<?
> sec
= getSecReference(state
);
803 if (classification
== null){
804 String classificationName
= state
.getConfig().getClassificationName();
806 Language language
= Language
.DEFAULT();
807 classification
= Classification
.NewInstance(classificationName
, sec
, language
);
808 state
.setClassification(classification
);
809 classification
.setUuid(state
.getConfig().getClassificationUuid());
810 classification
.getRootNode().setUuid(rootUuid
);
813 BotanicalName plantaeName
= BotanicalName
.NewInstance(Rank
.KINGDOM());
814 plantaeName
.setGenusOrUninomial("Plantae");
815 Taxon plantae
= Taxon
.NewInstance(plantaeName
, sec
);
816 TaxonNode plantaeNode
= classification
.addChildTaxon(plantae
, null, null);
817 plantaeNode
.setUuid(plantaeUuid
);
818 state
.setRootNode(plantaeNode
);
819 getClassificationService().save(classification
);
821 rootNode
= plantaeNode
;
832 private String
getValue(HashMap
<String
, String
> record
, String originalKey
) {
833 String value
= record
.get(originalKey
);
834 if (! StringUtils
.isBlank(value
)) {
835 if (logger
.isDebugEnabled()) { logger
.debug(originalKey
+ ": " + value
); }
836 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
846 * Stores taxa records in DB
849 protected void firstPass(CubaImportState state
) {
850 boolean isSynonym
= false;
852 String line
= state
.getCurrentLine() + ": ";
853 HashMap
<String
, String
> record
= state
.getOriginalRecord();
855 Set
<String
> keys
= record
.keySet();
856 for (String key
: keys
) {
857 if (! expectedKeys
.contains(key
)){
858 logger
.warn(line
+ "Unexpected Key: " + key
);
862 if (record
.get("Fam. default") == null && keys
.size() == 2 && record
.get("Syn.") == null && record
.get("Nat") != null && record
.get("Adv") != null){
863 //second header line, don't handle
868 TaxonNode familyTaxon
= getFamilyTaxon(record
, state
);
869 if (familyTaxon
== null){
870 if (record
.get("Taxón") != null){
871 logger
.warn(line
+ "Family not recognized but taxon exists: " + record
.get("Taxón"));
873 }else if (record
.get("Syn.") == null){
874 logger
.warn(line
+ "Family not recognized but also no synonym exists");
884 Taxon taxon
= makeTaxon(record
, state
, familyTaxon
, isSynonym
);
885 if (taxon
== null && ! isSynonym
){
886 logger
.warn(line
+ "taxon could not be created and is null");
889 state
.setCurrentTaxon(taxon
);
892 makeAlternativeFamilies(record
, state
, familyTaxon
, taxon
);
895 makeNotes(record
, state
);
898 makeSynonyms(record
, state
);
900 //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
901 makeCubanDistribution(record
, state
);
904 // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
905 // "CuC","VC","Ci","SS","CA","Cam","LT",
906 // "CuE","Gr","Ho","SC","Gu",
907 makeProvincesDistribution(record
, state
);
909 // "Esp","Ja","PR","Men","Bah","Cay",
910 // "AmN","AmC","AmS","VM"});
911 makeOtherAreasDistribution(record
, state
);
914 state
.setHighestStatusForTaxon(null);
927 private void makeAlternativeFamilies(HashMap
<String
, String
> record
,
928 CubaImportState state
,
929 TaxonNode familyTaxon
,
932 String famFRC
= record
.get("Fam. FRC");
933 String famAS
= record
.get("Fam. A&S");
934 String famFC
= record
.get("Fam. FC");
936 Reference
<?
> refFRC
= makeReference(state
, CubaTransformer
.uuidRefFRC
);
937 Reference
<?
> refAS
= makeReference(state
, CubaTransformer
.uuidRefAS
);
938 Reference
<?
> refFC
= makeReference(state
, CubaTransformer
.uuidRefFC
);
940 makeSingleAlternativeFamily(state
, taxon
, famFRC
, refFRC
);
941 makeSingleAlternativeFamily(state
, taxon
, famAS
, refAS
);
942 makeSingleAlternativeFamily(state
, taxon
, famFC
, refFC
);
951 private Reference
<?
> makeReference(CubaImportState state
, UUID uuidRef
) {
952 Reference
<?
> ref
= state
.getReference(uuidRef
);
954 ref
= getReferenceService().find(uuidRef
);
955 state
.putReference(uuidRef
, ref
);
967 private void makeSingleAlternativeFamily(CubaImportState state
, Taxon taxon
, String famStr
, Reference
<?
> famRef
) {
968 if (isBlank(famStr
)){
972 TaxonDescription desc
= getTaxonDescription(taxon
, false, true);
977 altFamUuid1
= state
.getTransformer().getFeatureUuid("Alt.Fam.");
978 altFamUuid2
= state
.getTransformer().getFeatureUuid("Alt.Fam.2");
979 } catch (UndefinedTransformerMethodException e
) {
980 throw new RuntimeException(e
);
984 Taxon famTaxon
= makeAlternativeFamilyTaxon(state
, famStr
, famRef
);
988 Feature feature1
= getFeature(state
, altFamUuid1
, "Family in other floras", "Family in other floras", "Other floras", null);
989 // TextData textData = TextData.NewInstance(feature1, famStr, Language.DEFAULT(), null);
990 TextData textData
= TextData
.NewInstance(feature1
, null, Language
.DEFAULT(), null);
991 textData
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null,null, famRef
, null, famTaxon
.getName(),null);
992 desc
.addElement(textData
);
997 Feature feature2
= getFeature(state
, altFamUuid2
, "Family in other floras(2)", "Family in other floras(2)", "Other floras(2)", null);
998 feature2
.setSupportsTaxonInteraction(true);
999 TaxonInteraction taxInteract
= TaxonInteraction
.NewInstance(feature2
);
1000 taxInteract
.setTaxon2(famTaxon
);
1001 taxInteract
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null,null, famRef
, null);
1002 desc
.addElement(taxInteract
);
1005 famTaxon
.addTaxonRelation(taxon
, TaxonRelationshipType
.INCLUDES(), taxon
.getSec(), null);
1018 // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1019 // "CuC","VC","Ci","SS","CA","Cam","LT",
1020 // "CuE","Gr","Ho","SC","Gu",
1021 private void makeProvincesDistribution(HashMap
<String
, String
> record
, CubaImportState state
) {
1022 List
<String
> areaKeys
= Arrays
.asList(new String
[]{
1023 "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1024 "CuC","VC","Ci","SS","CA","Cam","LT",
1025 "CuE","Gr","Ho","SC","Gu",
1027 for (String areaKey
: areaKeys
){
1028 state
.setCubanProvince(true);
1029 makeSingleProvinceDistribution(areaKey
, record
, state
);
1033 private void makeOtherAreasDistribution(HashMap
<String
, String
> record
, CubaImportState state
) {
1034 List
<String
> areaKeys
= Arrays
.asList(new String
[]{
1035 "Esp","Ja","PR","Men","Bah","Cay",
1036 "AmN","AmC","AmS","VM"});
1037 for (String areaKey
: areaKeys
){
1038 state
.setCubanProvince(false);
1039 makeSingleProvinceDistribution(areaKey
, record
, state
);
1050 private void makeSingleProvinceDistribution(String areaKey
,
1051 HashMap
<String
, String
> record
,
1052 CubaImportState state
) {
1054 UUID areaUuid
= state
.getTransformer().getNamedAreaUuid(areaKey
);
1055 if (areaUuid
== null){
1056 logger
.warn("Area not recognized: " + areaKey
);
1059 if (record
.get(areaKey
)==null){
1060 return; //no status defined
1063 NamedArea area
= getNamedArea(state
, areaUuid
, null, null, null, null, null);
1065 logger
.warn(state
.getCurrentLine() + ": Area not recognized: " + area
);
1067 TaxonDescription desc
= getTaxonDescription(state
.getCurrentTaxon(), false, true);
1068 PresenceAbsenceTerm status
= makeProvinceStatus(areaKey
, record
, state
);
1069 if (status
== null){
1070 logger
.warn(state
.getCurrentLine() + ": Province distribution status could not be defined: " + record
.get(areaKey
));
1072 Distribution distribution
= Distribution
.NewInstance(area
, status
);
1073 desc
.addElement(distribution
);
1074 } catch (UndefinedTransformerMethodException e
) {
1075 e
.printStackTrace();
1085 * @param highestStatus
1087 * @throws UndefinedTransformerMethodException
1089 private PresenceAbsenceTerm
makeProvinceStatus(String areaKey
,
1090 HashMap
<String
, String
> record
,
1091 CubaImportState state
) throws UndefinedTransformerMethodException
{
1093 String statusStr
= record
.get(areaKey
);
1094 if (statusStr
== null){
1097 PresenceAbsenceTerm status
= state
.getTransformer().getPresenceTermByKey(statusStr
);
1098 if (status
== null){
1099 PresenceAbsenceTerm highestStatus
= state
.getHighestStatusForTaxon();
1100 if (state
.isCubanProvince() && isMinus(statusStr
)){
1101 getAbsenceTermForStatus(state
, highestStatus
);
1102 }else if (! state
.isCubanProvince() && isMinus(statusStr
)){
1103 status
= state
.getTransformer().getPresenceTermByKey("--");
1105 UUID statusUuid
= state
.getTransformer().getPresenceTermUuid(statusStr
);
1106 status
= getPresenceTerm(state
, statusUuid
, null, null, null, false);
1115 * @param highestStatus
1116 * @throws UndefinedTransformerMethodException
1118 private PresenceAbsenceTerm
getAbsenceTermForStatus(CubaImportState state
, PresenceAbsenceTerm highestStatus
) throws UndefinedTransformerMethodException
{
1119 if (highestStatus
== null){
1120 logger
.warn(state
.getCurrentLine() + ": Highest status not defined");
1123 PresenceAbsenceTerm result
= null;
1124 if (highestStatus
.equals(getStatus(state
, "E"))){
1125 result
= getStatus(state
, "-E");
1126 }else if (highestStatus
.getUuid().equals(state
.getTransformer().getPresenceTermUuid("Ind.")) || highestStatus
.equals(PresenceAbsenceTerm
.NATIVE())){
1127 result
= getStatus(state
, "-Ind.");
1128 }else if (highestStatus
.equals(getStatus(state
, "Ind.?"))){
1129 result
= getStatus(state
, "-Ind.?"); //TODO
1130 }else if (highestStatus
.equals(getStatus(state
, "N"))){
1131 result
= getStatus(state
, "-N");
1132 }else if (highestStatus
.equals(getStatus(state
, "P"))){
1133 result
= getStatus(state
, "-P");
1134 }else if (highestStatus
.equals(getStatus(state
, "A"))){
1135 result
= getStatus(state
, "-A");
1136 }else if (highestStatus
.equals(getStatus(state
, "C"))){
1137 result
= getStatus(state
, "-C");
1139 logger
.warn(state
.getCurrentLine() + ": Absent province status could not be defined for highest status " + highestStatus
.getTitleCache());
1147 * @throws UndefinedTransformerMethodException
1149 private PresenceAbsenceTerm
getStatus(CubaImportState state
, String key
) throws UndefinedTransformerMethodException
{
1150 PresenceAbsenceTerm status
= state
.getTransformer().getPresenceTermByKey(key
);
1151 if (status
== null){
1152 UUID statusUuid
= state
.getTransformer().getPresenceTermUuid(key
);
1153 status
= getPresenceTerm(state
, statusUuid
, null, null, null, false);
1160 * Stores parent-child, synonym and common name relationships
1163 protected void secondPass(CubaImportState state
) {
1164 // CyprusRow cyprusRow = state.getCyprusRow();
1170 protected boolean isIgnore(CubaImportState state
) {
1171 return ! state
.getConfig().isDoTaxa();
1175 protected boolean doCheck(CubaImportState state
) {
1176 logger
.warn("DoCheck not yet implemented for CubaExcelImport");