1 |
ede5c502
|
Andreas Müller
|
/**
|
2 |
|
|
* Copyright (C) 2007 EDIT
|
3 |
|
|
* European Distributed Institute of Taxonomy
|
4 |
|
|
* http://www.e-taxonomy.eu
|
5 |
|
|
*
|
6 |
|
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7 |
|
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8 |
|
|
*/
|
9 |
|
|
|
10 |
|
|
package eu.etaxonomy.cdm.io.cuba;
|
11 |
|
|
|
12 |
fb3dec85
|
Andreas Müller
|
import java.util.ArrayList;
|
13 |
ede5c502
|
Andreas Müller
|
import java.util.Arrays;
|
14 |
|
|
import java.util.HashMap;
|
15 |
c9f78619
|
Andreas Müller
|
import java.util.HashSet;
|
16 |
ede5c502
|
Andreas Müller
|
import java.util.List;
|
17 |
|
|
import java.util.Set;
|
18 |
|
|
import java.util.UUID;
|
19 |
fb3dec85
|
Andreas Müller
|
import java.util.regex.Matcher;
|
20 |
|
|
import java.util.regex.Pattern;
|
21 |
ede5c502
|
Andreas Müller
|
|
22 |
|
|
import org.apache.commons.lang.StringUtils;
|
23 |
|
|
import org.apache.log4j.Logger;
|
24 |
|
|
import org.springframework.stereotype.Component;
|
25 |
|
|
|
26 |
|
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
27 |
|
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
28 |
|
|
import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
|
29 |
b9cdcc88
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.agent.Person;
|
30 |
fb3dec85
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.agent.Team;
|
31 |
|
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
32 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
33 |
|
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
34 |
b9cdcc88
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
35 |
|
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
36 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.common.Language;
|
37 |
c9f78619
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
|
38 |
4b9c9c4b
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.common.Representation;
|
39 |
b9cdcc88
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
40 |
|
|
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
|
41 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
42 |
c9f78619
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.description.Feature;
|
43 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
|
44 |
|
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
45 |
c9f78619
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.description.TaxonInteraction;
|
46 |
|
|
import eu.etaxonomy.cdm.model.description.TextData;
|
47 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
48 |
b9cdcc88
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
|
49 |
7d882578
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.IBotanicalName;
|
50 |
b9cdcc88
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.NameRelationship;
|
51 |
fb3dec85
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
|
52 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
53 |
fb3dec85
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
|
54 |
|
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
55 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.Rank;
|
56 |
86536e03
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
57 |
ded3de15
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
58 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
59 |
fb3dec85
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
60 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
61 |
fb3dec85
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
|
62 |
3ef2e1bd
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
63 |
|
|
import eu.etaxonomy.cdm.model.taxon.SynonymType;
|
64 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
65 |
|
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
66 |
c9f78619
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
|
67 |
6af76d56
|
Andreas Müller
|
import eu.etaxonomy.cdm.strategy.homotypicgroup.BasionymRelationCreator;
|
68 |
ede5c502
|
Andreas Müller
|
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
|
69 |
|
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
70 |
|
|
|
71 |
|
|
/**
|
72 |
|
|
* @author a.mueller
|
73 |
|
|
* @created 05.01.2016
|
74 |
|
|
*/
|
75 |
|
|
|
76 |
|
|
@Component
|
77 |
|
|
public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
|
78 |
|
|
private static final long serialVersionUID = -747486709409732371L;
|
79 |
|
|
private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
|
80 |
|
|
|
81 |
5cdaf78e
|
Andreas Müller
|
private static final String HOMONYM_MARKER = "\\s+homon.?$";
|
82 |
fb3dec85
|
Andreas Müller
|
private static final String DOUBTFUL_MARKER = "^\\?\\s?";
|
83 |
|
|
|
84 |
|
|
|
85 |
|
|
private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
|
86 |
|
|
private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
|
87 |
|
|
|
88 |
ede5c502
|
Andreas Müller
|
private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
|
89 |
|
|
private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
|
90 |
|
|
|
91 |
c9f78619
|
Andreas Müller
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
92 |
|
|
"Fam. default","Fam. FRC","Fam. A&S","Fam. FC",
|
93 |
|
|
"Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
|
94 |
ede5c502
|
Andreas Müller
|
|
95 |
|
|
@Override
|
96 |
|
|
protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
|
97 |
fb3dec85
|
Andreas Müller
|
//we do everything in firstPass here
|
98 |
ede5c502
|
Andreas Müller
|
return;
|
99 |
|
|
}
|
100 |
|
|
|
101 |
|
|
|
102 |
|
|
/**
|
103 |
|
|
* @param record
|
104 |
|
|
* @param state
|
105 |
|
|
* @param taxon
|
106 |
|
|
*/
|
107 |
fb3dec85
|
Andreas Müller
|
private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
|
108 |
ede5c502
|
Andreas Müller
|
try {
|
109 |
4b9c9c4b
|
Andreas Müller
|
NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("Cu"), null, null, null, null, null);
|
110 |
fb3dec85
|
Andreas Müller
|
TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
|
111 |
0a6a64c9
|
Andreas Müller
|
List<PresenceAbsenceTerm> statuss = makeCubanStatuss(record, state);
|
112 |
fb3dec85
|
Andreas Müller
|
for (PresenceAbsenceTerm status : statuss){
|
113 |
|
|
Distribution distribution = Distribution.NewInstance(cuba, status);
|
114 |
|
|
desc.addElement(distribution);
|
115 |
b9cdcc88
|
Andreas Müller
|
distribution.addSource(makeDescriptionSource(state));
|
116 |
fb3dec85
|
Andreas Müller
|
}
|
117 |
ede5c502
|
Andreas Müller
|
} catch (UndefinedTransformerMethodException e) {
|
118 |
|
|
e.printStackTrace();
|
119 |
|
|
}
|
120 |
|
|
}
|
121 |
|
|
|
122 |
|
|
|
123 |
|
|
/**
|
124 |
|
|
* @param record
|
125 |
|
|
* @param state
|
126 |
|
|
* @return
|
127 |
|
|
* @throws UndefinedTransformerMethodException
|
128 |
|
|
*/
|
129 |
0a6a64c9
|
Andreas Müller
|
private List<PresenceAbsenceTerm> makeCubanStatuss(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
|
130 |
|
|
PresenceAbsenceTerm highestStatus = null;
|
131 |
ede5c502
|
Andreas Müller
|
|
132 |
fb3dec85
|
Andreas Müller
|
String line = state.getCurrentLine() + ": ";
|
133 |
|
|
List<PresenceAbsenceTerm> result = new ArrayList<>();
|
134 |
|
|
|
135 |
ede5c502
|
Andreas Müller
|
String endemicStr = getValue(record, "End");
|
136 |
|
|
String indigenousStr = getValue(record, "Ind");
|
137 |
|
|
String indigenousDoubtStr = getValue(record, "Ind? D");
|
138 |
|
|
String naturalisedStr = getValue(record, "Nat");
|
139 |
|
|
String dudStr = getValue(record, "Dud P");
|
140 |
|
|
String advStr = getValue(record, "Adv");
|
141 |
|
|
String cultStr = getValue(record, "Cult C");
|
142 |
|
|
|
143 |
b9cdcc88
|
Andreas Müller
|
state.setEndemic(false);
|
144 |
|
|
|
145 |
ede5c502
|
Andreas Müller
|
if (endemicStr != null){
|
146 |
fb3dec85
|
Andreas Müller
|
if(endemicStr.equals("+")){
|
147 |
|
|
PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
|
148 |
|
|
result.add(endemicState);
|
149 |
0a6a64c9
|
Andreas Müller
|
highestStatus = endemicState;
|
150 |
b9cdcc88
|
Andreas Müller
|
state.setEndemic(true);
|
151 |
fb3dec85
|
Andreas Müller
|
}else if(isMinus(endemicStr)){
|
152 |
|
|
UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
|
153 |
|
|
PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
|
154 |
|
|
result.add(endemicState);
|
155 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "endemic", false);
|
156 |
c9f78619
|
Andreas Müller
|
}else if(endemicStr.equals("?")){
|
157 |
|
|
UUID endemicDoubtfulUuid = state.getTransformer().getPresenceTermUuid("?E");
|
158 |
|
|
PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicDoubtfulUuid, null, null, null, false);
|
159 |
|
|
result.add(endemicState);
|
160 |
|
|
checkAbsentHighestState(highestStatus, line, "endemic", false);
|
161 |
ede5c502
|
Andreas Müller
|
}else{
|
162 |
fb3dec85
|
Andreas Müller
|
logger.warn(line + "Endemic not recognized: " + endemicStr);
|
163 |
ede5c502
|
Andreas Müller
|
}
|
164 |
fb3dec85
|
Andreas Müller
|
}
|
165 |
|
|
if (indigenousStr != null){
|
166 |
|
|
if(indigenousStr.equals("+")){
|
167 |
b9cdcc88
|
Andreas Müller
|
PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("Ind.");
|
168 |
|
|
// PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
|
169 |
fb3dec85
|
Andreas Müller
|
result.add(indigenousState);
|
170 |
0a6a64c9
|
Andreas Müller
|
highestStatus = highestStatus != null ? highestStatus : indigenousState;
|
171 |
fb3dec85
|
Andreas Müller
|
}else if(isMinus(indigenousStr)){
|
172 |
0a6a64c9
|
Andreas Müller
|
PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("-Ind.");
|
173 |
|
|
result.add(indigenousState);
|
174 |
|
|
checkAbsentHighestState(highestStatus, line, "indigenous", false);
|
175 |
fb3dec85
|
Andreas Müller
|
}else if(indigenousStr.equals("?")){
|
176 |
b9cdcc88
|
Andreas Müller
|
PresenceAbsenceTerm indigenousDoubtState = state.getTransformer().getPresenceTermByKey("?Ind.");
|
177 |
|
|
// PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
|
178 |
fb3dec85
|
Andreas Müller
|
result.add(indigenousDoubtState);
|
179 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "indigenous", true);
|
180 |
ede5c502
|
Andreas Müller
|
}else{
|
181 |
fb3dec85
|
Andreas Müller
|
logger.warn(line + "Indigenous not recognized: " + indigenousStr);
|
182 |
|
|
}
|
183 |
|
|
}
|
184 |
|
|
if(indigenousDoubtStr != null){
|
185 |
|
|
if(indigenousDoubtStr.equals("D")){
|
186 |
b9cdcc88
|
Andreas Müller
|
PresenceAbsenceTerm doubtIndigenousState = state.getTransformer().getPresenceTermByKey("Ind.?");
|
187 |
|
|
// PresenceAbsenceTerm doubtIndigenousState = getPresenceTerm(state, doubtIndigenousUuid, null, null, null, false);
|
188 |
|
|
result.add(doubtIndigenousState);
|
189 |
|
|
highestStatus = highestStatus != null ? highestStatus : doubtIndigenousState;
|
190 |
|
|
}else if(isMinus(indigenousDoubtStr)){
|
191 |
|
|
UUID doubtIndigenousErrorUuid = state.getTransformer().getPresenceTermUuid("-Ind.?");
|
192 |
|
|
PresenceAbsenceTerm doubtIndigenousErrorState = getPresenceTerm(state, doubtIndigenousErrorUuid, null, null, null, false);
|
193 |
|
|
result.add(doubtIndigenousErrorState);
|
194 |
|
|
checkAbsentHighestState(highestStatus, line, "doubtfully indigenous", true);
|
195 |
fb3dec85
|
Andreas Müller
|
}else{
|
196 |
b9cdcc88
|
Andreas Müller
|
logger.warn(line + "doubtfully indigenous not recognized: " + indigenousDoubtStr);
|
197 |
fb3dec85
|
Andreas Müller
|
}
|
198 |
|
|
}
|
199 |
|
|
if(naturalisedStr != null){
|
200 |
|
|
if(naturalisedStr.equals("N")){
|
201 |
0a6a64c9
|
Andreas Müller
|
PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
|
202 |
|
|
result.add(haturalizedState);
|
203 |
|
|
highestStatus = highestStatus != null ? highestStatus : haturalizedState;
|
204 |
fb3dec85
|
Andreas Müller
|
}else if(isMinus(naturalisedStr)){
|
205 |
|
|
UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
|
206 |
|
|
PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
|
207 |
|
|
result.add(naturalisedErrorState);
|
208 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "naturalized", false);
|
209 |
fb3dec85
|
Andreas Müller
|
}else if(naturalisedStr.equals("?")){
|
210 |
|
|
UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
|
211 |
|
|
PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
|
212 |
|
|
result.add(naturalisedDoubtState);
|
213 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "naturalized", true);
|
214 |
fb3dec85
|
Andreas Müller
|
}else{
|
215 |
|
|
logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
|
216 |
|
|
}
|
217 |
|
|
}
|
218 |
|
|
if(dudStr != null){
|
219 |
|
|
if(dudStr.equals("P")){
|
220 |
|
|
UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
|
221 |
|
|
PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
|
222 |
|
|
result.add(dudState);
|
223 |
0a6a64c9
|
Andreas Müller
|
highestStatus = highestStatus != null ? highestStatus : dudState;
|
224 |
fb3dec85
|
Andreas Müller
|
}else if(isMinus(dudStr)){
|
225 |
|
|
UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
|
226 |
|
|
PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
|
227 |
|
|
result.add(nonNativeErrorState);
|
228 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", false);
|
229 |
fb3dec85
|
Andreas Müller
|
}else if(dudStr.equals("?")){
|
230 |
|
|
UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
|
231 |
|
|
PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
|
232 |
|
|
result.add(naturalisedDoubtState);
|
233 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", true);
|
234 |
fb3dec85
|
Andreas Müller
|
}else{
|
235 |
|
|
logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
|
236 |
|
|
}
|
237 |
|
|
}
|
238 |
|
|
if(advStr != null){
|
239 |
|
|
if(advStr.equals("A")){
|
240 |
b9cdcc88
|
Andreas Müller
|
PresenceAbsenceTerm advState = state.getTransformer().getPresenceTermByKey("Adv.");
|
241 |
|
|
// PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
|
242 |
fb3dec85
|
Andreas Müller
|
result.add(advState);
|
243 |
0a6a64c9
|
Andreas Müller
|
highestStatus = highestStatus != null ? highestStatus : advState;
|
244 |
fb3dec85
|
Andreas Müller
|
}else if(isMinus(advStr)){
|
245 |
|
|
UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
|
246 |
|
|
PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
|
247 |
|
|
result.add(advState);
|
248 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "adventive", false);
|
249 |
b9cdcc88
|
Andreas Müller
|
}else if(advStr.equals("(A)")){
|
250 |
|
|
UUID rareCasualUuid = state.getTransformer().getPresenceTermUuid("(A)");
|
251 |
|
|
PresenceAbsenceTerm rareCasual = getPresenceTerm(state, rareCasualUuid, null, null, null, false);
|
252 |
|
|
result.add(rareCasual);
|
253 |
fb3dec85
|
Andreas Müller
|
}else{
|
254 |
|
|
logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
|
255 |
|
|
}
|
256 |
|
|
}else if(cultStr != null){
|
257 |
|
|
if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
|
258 |
|
|
logger.warn("'cultivated' not recognized: " + cultStr);
|
259 |
|
|
}else if(cultStr.equals("C")){
|
260 |
|
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
|
261 |
|
|
result.add(cultivatedState);
|
262 |
0a6a64c9
|
Andreas Müller
|
highestStatus = highestStatus != null ? highestStatus : cultivatedState;
|
263 |
fb3dec85
|
Andreas Müller
|
}else if(cultStr.equals("?")){
|
264 |
|
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
|
265 |
|
|
result.add(cultivatedState);
|
266 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "cultivated", true);
|
267 |
fb3dec85
|
Andreas Müller
|
}else if(cultStr.equals("(C)")){
|
268 |
|
|
UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
|
269 |
|
|
PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
|
270 |
|
|
result.add(cultivatedState);
|
271 |
|
|
}else if(isMinus(cultStr)){
|
272 |
|
|
PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
|
273 |
|
|
result.add(cultivatedState);
|
274 |
0a6a64c9
|
Andreas Müller
|
checkAbsentHighestState(highestStatus, line, "cultivated", false);
|
275 |
fb3dec85
|
Andreas Müller
|
}else{
|
276 |
|
|
logger.warn(line + "'cultivated' not recognized: " + cultStr);
|
277 |
ede5c502
|
Andreas Müller
|
}
|
278 |
|
|
}
|
279 |
0a6a64c9
|
Andreas Müller
|
state.setHighestStatusForTaxon(highestStatus);
|
280 |
fb3dec85
|
Andreas Müller
|
return result;
|
281 |
|
|
}
|
282 |
|
|
|
283 |
|
|
|
284 |
0a6a64c9
|
Andreas Müller
|
/**
|
285 |
|
|
* @param highestStatus
|
286 |
|
|
* @param line
|
287 |
|
|
*/
|
288 |
|
|
private void checkAbsentHighestState(PresenceAbsenceTerm highestStatus, String line, String stateLabel, boolean doubtful) {
|
289 |
b9cdcc88
|
Andreas Müller
|
//can be removed, highest status is not used anymore
|
290 |
0a6a64c9
|
Andreas Müller
|
if (highestStatus == null){
|
291 |
|
|
String absentStr = doubtful ? "doubtful" : "absent";
|
292 |
b9cdcc88
|
Andreas Müller
|
logger.info(line + "Highest cuban state is " + absentStr + " " + stateLabel);
|
293 |
0a6a64c9
|
Andreas Müller
|
}
|
294 |
|
|
|
295 |
|
|
}
|
296 |
|
|
|
297 |
|
|
|
298 |
fb3dec85
|
Andreas Müller
|
/**
|
299 |
|
|
* @param indigenousStr
|
300 |
|
|
* @return
|
301 |
|
|
*/
|
302 |
|
|
private boolean isMinus(String str) {
|
303 |
b9cdcc88
|
Andreas Müller
|
return str.equals("-") || str.equals("–") || str.equals("‒");
|
304 |
ede5c502
|
Andreas Müller
|
}
|
305 |
|
|
|
306 |
|
|
|
307 |
|
|
/**
|
308 |
|
|
* @param indigenousStr
|
309 |
|
|
* @return
|
310 |
|
|
*/
|
311 |
fb3dec85
|
Andreas Müller
|
private boolean checkPlusMinusDoubt(String str) {
|
312 |
|
|
return str.equals("+") || isMinus(str)|| str.equals("?");
|
313 |
ede5c502
|
Andreas Müller
|
}
|
314 |
|
|
|
315 |
|
|
|
316 |
|
|
/**
|
317 |
|
|
* @param indigenousStr
|
318 |
|
|
* @param indigenousDoubtStr
|
319 |
|
|
* @param naturalisedStr
|
320 |
|
|
* @param dudStr
|
321 |
|
|
* @param advStr
|
322 |
|
|
* @param cultStr
|
323 |
|
|
*/
|
324 |
|
|
private boolean checkAllNull(String ... others) {
|
325 |
|
|
for (String other : others){
|
326 |
|
|
if (other != null){
|
327 |
|
|
return false;
|
328 |
|
|
}
|
329 |
|
|
}
|
330 |
|
|
return true;
|
331 |
|
|
}
|
332 |
|
|
|
333 |
|
|
|
334 |
fb3dec85
|
Andreas Müller
|
private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
|
335 |
|
|
// String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
|
336 |
|
|
// + "(\\((.{6,})\\))?";
|
337 |
|
|
private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
|
338 |
|
|
+"(\\((.{6,})\\))?";
|
339 |
|
|
private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
|
340 |
|
|
+"(\\((.{6,})\\))?";
|
341 |
b0b606e6
|
Andreas Müller
|
private static final String auctRegExStr = "auct\\."
|
342 |
b9cdcc88
|
Andreas Müller
|
+"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sCombs|\\sBritton|\\sGriseb\\.(\\sFC-S|\\sA&S)?|\\sWright"
|
343 |
c9f78619
|
Andreas Müller
|
+ "|\\sHammer|\\sEngl\\.||\\sMaza|\\sMiers|\\sRoig|\\sBorhidi|\\sFRC|\\sCoL"
|
344 |
|
|
+ "|\\sAckerman|\\sMújica|\\sDíaz|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
|
345 |
|
|
|
346 |
|
|
|
347 |
5cdaf78e
|
Andreas Müller
|
private static final String missapliedRegExStr = "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
|
348 |
c9f78619
|
Andreas Müller
|
private static final String sphalmRegExStr = "“(.*{5,})”\\s+((FC-S|A&S)\\s)?sphalm\\.(\\s(FC(-S)?|A&S|inval\\.))?";
|
349 |
|
|
private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.(\\s(West|Moldenke|FC|Jacq.))?";
|
350 |
fb3dec85
|
Andreas Müller
|
private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
|
351 |
|
|
|
352 |
|
|
private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
|
353 |
|
|
private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
|
354 |
|
|
private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
|
355 |
|
|
private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
|
356 |
c9f78619
|
Andreas Müller
|
private static final Pattern sphalmRegEx = Pattern.compile(sphalmRegExStr);
|
357 |
fb3dec85
|
Andreas Müller
|
|
358 |
ede5c502
|
Andreas Müller
|
/**
|
359 |
|
|
* @param record
|
360 |
|
|
* @param state
|
361 |
|
|
* @param taxon
|
362 |
|
|
*/
|
363 |
b9cdcc88
|
Andreas Müller
|
private void makeSynonyms(HashMap<String, String> record, CubaImportState state, boolean isFirstSynonym) {
|
364 |
fb3dec85
|
Andreas Müller
|
// boolean forAccepted = true;
|
365 |
|
|
String synonymStr = record.get("Syn.");
|
366 |
|
|
String line = state.getCurrentLine() + ": ";
|
367 |
|
|
|
368 |
b9cdcc88
|
Andreas Müller
|
|
369 |
fb3dec85
|
Andreas Müller
|
if (synonymStr == null){
|
370 |
|
|
//TODO test that this is not a synonym only line
|
371 |
|
|
return;
|
372 |
|
|
}
|
373 |
b9cdcc88
|
Andreas Müller
|
|
374 |
|
|
if (state.getCurrentTaxon() == null){
|
375 |
|
|
logger.error(line + "Current taxon is null for synonym");
|
376 |
|
|
return;
|
377 |
|
|
}
|
378 |
|
|
|
379 |
|
|
|
380 |
fb3dec85
|
Andreas Müller
|
synonymStr = synonymStr.trim();
|
381 |
b9cdcc88
|
Andreas Müller
|
synonymStr = synonymStr.replace("[taxon]", "[infraspec.]");
|
382 |
fb3dec85
|
Andreas Müller
|
|
383 |
|
|
// String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
|
384 |
|
|
// String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
|
385 |
|
|
|
386 |
|
|
// Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
|
387 |
|
|
|
388 |
b9cdcc88
|
Andreas Müller
|
|
389 |
fb3dec85
|
Andreas Müller
|
Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
|
390 |
|
|
Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
|
391 |
|
|
Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
|
392 |
|
|
Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
|
393 |
c9f78619
|
Andreas Müller
|
Matcher sphalmMatcher = sphalmRegEx.matcher(synonymStr);
|
394 |
fb3dec85
|
Andreas Müller
|
|
395 |
7d882578
|
Andreas Müller
|
List<IBotanicalName> homonyms = new ArrayList<>();
|
396 |
fb3dec85
|
Andreas Müller
|
if (missapliedMatcher.matches()){
|
397 |
5cdaf78e
|
Andreas Müller
|
boolean doubtful = missapliedMatcher.group(1) != null;
|
398 |
|
|
String firstPart = missapliedMatcher.group(2);
|
399 |
86536e03
|
Andreas Müller
|
IBotanicalName name = (IBotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
400 |
b9cdcc88
|
Andreas Müller
|
name.addSource(makeOriginalSource(state));
|
401 |
fb3dec85
|
Andreas Müller
|
|
402 |
5cdaf78e
|
Andreas Müller
|
String secondPart = missapliedMatcher.group(3);
|
403 |
fb3dec85
|
Andreas Müller
|
Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
|
404 |
b9cdcc88
|
Andreas Müller
|
misappliedNameTaxon.addSource(makeOriginalSource(state));
|
405 |
5cdaf78e
|
Andreas Müller
|
misappliedNameTaxon.setDoubtful(doubtful);
|
406 |
fb3dec85
|
Andreas Müller
|
if (secondPart.startsWith("sensu")){
|
407 |
|
|
secondPart = secondPart.substring(5).trim();
|
408 |
|
|
if (secondPart.contains(" ")){
|
409 |
5cdaf78e
|
Andreas Müller
|
logger.warn(line + "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart);
|
410 |
fb3dec85
|
Andreas Müller
|
}
|
411 |
8422c0cd
|
Andreas Müller
|
Reference sensu = ReferenceFactory.newGeneric();
|
412 |
fb3dec85
|
Andreas Müller
|
Team team = Team.NewTitledInstance(secondPart, null);
|
413 |
|
|
sensu.setAuthorship(team);
|
414 |
|
|
misappliedNameTaxon.setSec(sensu);
|
415 |
b0b606e6
|
Andreas Müller
|
}else if (secondPart.matches(auctRegExStr)){
|
416 |
fb3dec85
|
Andreas Müller
|
secondPart = secondPart.replace("p. p.", "p.p.");
|
417 |
|
|
misappliedNameTaxon.setAppendedPhrase(secondPart);
|
418 |
|
|
}else{
|
419 |
|
|
logger.warn(line + "Misapplied second part not recognized: " + secondPart);
|
420 |
|
|
}
|
421 |
|
|
//TODO
|
422 |
8422c0cd
|
Andreas Müller
|
Reference relRef = null;
|
423 |
fb3dec85
|
Andreas Müller
|
state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
|
424 |
|
|
}else if (nomInvalMatcher.matches()){
|
425 |
|
|
String firstPart = nomInvalMatcher.group(1);
|
426 |
c9f78619
|
Andreas Müller
|
String afterInval = nomInvalMatcher.group(2);
|
427 |
|
|
if (StringUtils.isNotBlank(afterInval)){
|
428 |
|
|
logger.warn(state.getCurrentLine() + ": After inval to be implemented: " + afterInval);
|
429 |
|
|
}
|
430 |
86536e03
|
Andreas Müller
|
TaxonName name = (TaxonName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
431 |
b9cdcc88
|
Andreas Müller
|
name.addSource(makeOriginalSource(state));
|
432 |
fb3dec85
|
Andreas Müller
|
NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
|
433 |
|
|
name.addStatus(status);
|
434 |
3ef2e1bd
|
Andreas Müller
|
Synonym syn = state.getCurrentTaxon().addSynonymName(name, SynonymType.SYNONYM_OF());
|
435 |
|
|
syn.addSource(makeOriginalSource(state));
|
436 |
c9f78619
|
Andreas Müller
|
}else if (sphalmMatcher.matches()){
|
437 |
|
|
String firstPart = sphalmMatcher.group(1);
|
438 |
|
|
String sphalmPart = synonymStr.replace(firstPart, "").replace("“","").replace("”","").trim();
|
439 |
86536e03
|
Andreas Müller
|
TaxonName name = (TaxonName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
440 |
c9f78619
|
Andreas Müller
|
// NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
|
441 |
|
|
// name.addStatus(status);
|
442 |
b9cdcc88
|
Andreas Müller
|
name.addSource(makeOriginalSource(state));
|
443 |
3ef2e1bd
|
Andreas Müller
|
Synonym syn = state.getCurrentTaxon().addSynonymName(name, SynonymType.SYNONYM_OF());
|
444 |
|
|
syn.setAppendedPhrase(sphalmPart);
|
445 |
|
|
syn.setSec(null);
|
446 |
|
|
syn.addSource(makeOriginalSource(state));
|
447 |
fb3dec85
|
Andreas Müller
|
}else if (acceptedMatcher.matches()){
|
448 |
|
|
String firstPart = acceptedMatcher.group(1);
|
449 |
|
|
String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
|
450 |
7d882578
|
Andreas Müller
|
List<IBotanicalName> list = handleHomotypicGroup(firstPart, state, state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
|
451 |
b9cdcc88
|
Andreas Müller
|
checkFirstSynonym(state, list, isFirstSynonym, synonymStr, false);
|
452 |
fb3dec85
|
Andreas Müller
|
}else if(heterotypicMatcher.matches()){
|
453 |
|
|
String firstPart = heterotypicMatcher.group(1).trim();
|
454 |
|
|
String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
|
455 |
|
|
String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
|
456 |
|
|
boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
|
457 |
c9f78619
|
Andreas Müller
|
firstPart = replaceHomonIlleg(firstPart);
|
458 |
|
|
boolean isHomonym = firstPart.matches(".*" + HOMONYM_MARKER);
|
459 |
86536e03
|
Andreas Müller
|
TaxonName synName = (TaxonName)makeName(state, firstPart);
|
460 |
fb3dec85
|
Andreas Müller
|
if (synName.isProtectedTitleCache()){
|
461 |
b9cdcc88
|
Andreas Müller
|
logger.warn(line + "Heterotypic base synonym could not be parsed correctly: " + firstPart);
|
462 |
fb3dec85
|
Andreas Müller
|
}
|
463 |
|
|
if (isHomonym){
|
464 |
|
|
homonyms.add(synName);
|
465 |
|
|
}
|
466 |
3ef2e1bd
|
Andreas Müller
|
Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
|
467 |
|
|
syn.setDoubtful(isDoubtful);
|
468 |
|
|
syn.addSource(makeOriginalSource(state));
|
469 |
7d882578
|
Andreas Müller
|
List<IBotanicalName> list = handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
|
470 |
b9cdcc88
|
Andreas Müller
|
checkFirstSynonym(state, list, isFirstSynonym, synonymStr, true);
|
471 |
|
|
|
472 |
|
|
}else if (isSpecialHeterotypic(synonymStr)){
|
473 |
86536e03
|
Andreas Müller
|
TaxonName synName = (TaxonName)makeName(state, synonymStr);
|
474 |
b9cdcc88
|
Andreas Müller
|
if (synName.isProtectedTitleCache()){
|
475 |
|
|
logger.warn(line + "Special heterotypic synonym could not be parsed correctly:" + synonymStr);
|
476 |
|
|
}
|
477 |
3ef2e1bd
|
Andreas Müller
|
Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
|
478 |
|
|
syn.addSource(makeOriginalSource(state));
|
479 |
fb3dec85
|
Andreas Müller
|
}else{
|
480 |
|
|
logger.warn(line + "Synonym entry does not match: " + synonymStr);
|
481 |
|
|
}
|
482 |
|
|
}
|
483 |
|
|
|
484 |
b9cdcc88
|
Andreas Müller
|
/**
|
485 |
|
|
* @param state
|
486 |
|
|
* @param list
|
487 |
|
|
* @param isFirstSynonym
|
488 |
|
|
* @param synonymStr
|
489 |
|
|
* @param b
|
490 |
|
|
*/
|
491 |
7d882578
|
Andreas Müller
|
private void checkFirstSynonym(CubaImportState state, List<IBotanicalName> list, boolean isFirstSynonym, String synonymStr, boolean isHeterotypicMatcher) {
|
492 |
b9cdcc88
|
Andreas Müller
|
if (!isFirstSynonym){
|
493 |
|
|
return;
|
494 |
|
|
}
|
495 |
|
|
String line = state.getCurrentLine() + ": ";
|
496 |
7d882578
|
Andreas Müller
|
IBotanicalName currentName = isHeterotypicMatcher? (IBotanicalName)state.getCurrentTaxon().getName(): list.get(0);
|
497 |
b9cdcc88
|
Andreas Müller
|
boolean currentHasBasionym = currentName.getBasionymAuthorship() != null;
|
498 |
7d882578
|
Andreas Müller
|
IBotanicalName firstSynonym = isHeterotypicMatcher ? list.get(0): list.get(1);
|
499 |
b9cdcc88
|
Andreas Müller
|
// if (list.size() <= 1){
|
500 |
|
|
// logger.error(line + "homotypic list size is 1 but shouldn't");
|
501 |
|
|
// return;
|
502 |
|
|
// }
|
503 |
|
|
if (isHeterotypicMatcher && currentHasBasionym){
|
504 |
|
|
logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has basionym author but has no homotypic basionym , but : " + synonymStr);
|
505 |
|
|
}else if (isHeterotypicMatcher){
|
506 |
|
|
//first synonym must not have a basionym author
|
507 |
|
|
if (firstSynonym.getBasionymAuthorship() != null){
|
508 |
|
|
logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has no basionym but first synonym requires basionym : " + synonymStr);
|
509 |
|
|
}
|
510 |
|
|
}else{ //isAcceptedMatcher
|
511 |
|
|
if (currentHasBasionym){
|
512 |
|
|
if (! matchAuthor(currentName.getBasionymAuthorship(), firstSynonym.getCombinationAuthorship())){
|
513 |
|
|
logger.info(line + "Current basionym author and first synonym combination author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
|
514 |
|
|
}
|
515 |
|
|
}else{
|
516 |
|
|
if (! matchAuthor(currentName.getCombinationAuthorship(), firstSynonym.getBasionymAuthorship())){
|
517 |
|
|
logger.info(line + "Current combination author and first synonym basionym author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
|
518 |
|
|
}
|
519 |
|
|
}
|
520 |
|
|
}
|
521 |
|
|
|
522 |
|
|
}
|
523 |
|
|
|
524 |
|
|
|
525 |
|
|
/**
|
526 |
|
|
* @param synonymStr
|
527 |
|
|
* @return
|
528 |
|
|
*/
|
529 |
|
|
private boolean isSpecialHeterotypic(String synonymStr) {
|
530 |
|
|
if (synonymStr == null){
|
531 |
|
|
return false;
|
532 |
|
|
}else if (synonymStr.equals("Rhynchospora prenleloupiana (‘prenteloupiana’) Boeckeler")){
|
533 |
|
|
return true;
|
534 |
|
|
}else if (synonymStr.equals("Psidium longipes var. orbiculare (O.Berg) McVaugh")){
|
535 |
|
|
return true;
|
536 |
|
|
}
|
537 |
|
|
return false;
|
538 |
|
|
}
|
539 |
|
|
|
540 |
|
|
|
541 |
|
|
/**
|
542 |
|
|
* @param areaKey
|
543 |
|
|
* @param record
|
544 |
|
|
* @param state
|
545 |
|
|
* @param taxon
|
546 |
|
|
*/
|
547 |
|
|
private void makeSingleProvinceDistribution(String areaKey,
|
548 |
|
|
HashMap<String, String> record,
|
549 |
|
|
CubaImportState state) {
|
550 |
|
|
try {
|
551 |
|
|
UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
|
552 |
|
|
if (areaUuid == null){
|
553 |
|
|
logger.warn("Area not recognized: " + areaKey);
|
554 |
|
|
return;
|
555 |
|
|
}
|
556 |
|
|
if (record.get(areaKey)==null){
|
557 |
|
|
return; //no status defined
|
558 |
|
|
}
|
559 |
|
|
|
560 |
|
|
NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
|
561 |
|
|
if (area == null){
|
562 |
|
|
logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
|
563 |
|
|
}
|
564 |
|
|
TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
|
565 |
|
|
PresenceAbsenceTerm status = makeProvinceStatus(areaKey, record, state);
|
566 |
|
|
if (status == null){
|
567 |
|
|
logger.warn(state.getCurrentLine() + ": Province distribution status could not be defined: " + record.get(areaKey));
|
568 |
|
|
}
|
569 |
|
|
Distribution distribution = Distribution.NewInstance(area, status);
|
570 |
|
|
desc.addElement(distribution);
|
571 |
|
|
distribution.addSource(makeDescriptionSource(state));
|
572 |
|
|
} catch (UndefinedTransformerMethodException e) {
|
573 |
|
|
e.printStackTrace();
|
574 |
|
|
}
|
575 |
|
|
|
576 |
|
|
}
|
577 |
fb3dec85
|
Andreas Müller
|
|
578 |
|
|
|
579 |
|
|
/**
|
580 |
|
|
* @param synonymStr
|
581 |
|
|
* @param state
|
582 |
|
|
* @param homonyms
|
583 |
|
|
* @param homonymPart
|
584 |
|
|
* @param isDoubtful
|
585 |
|
|
* @param taxon
|
586 |
|
|
* @param homotypicalGroup
|
587 |
|
|
*/
|
588 |
7d882578
|
Andreas Müller
|
private List<IBotanicalName> handleHomotypicGroup(String homotypicStrOrig,
|
589 |
fb3dec85
|
Andreas Müller
|
CubaImportState state,
|
590 |
7d882578
|
Andreas Müller
|
IBotanicalName homotypicName,
|
591 |
fb3dec85
|
Andreas Müller
|
boolean isHeterotypic,
|
592 |
7d882578
|
Andreas Müller
|
List<IBotanicalName> homonyms,
|
593 |
fb3dec85
|
Andreas Müller
|
String homonymPart,
|
594 |
|
|
boolean isDoubtful) {
|
595 |
|
|
|
596 |
7d882578
|
Andreas Müller
|
List<IBotanicalName> homotypicNameList = new ArrayList<>();
|
597 |
b9cdcc88
|
Andreas Müller
|
homotypicNameList.add(homotypicName);
|
598 |
|
|
|
599 |
|
|
String homotypicStr = homotypicStrOrig;
|
600 |
fb3dec85
|
Andreas Müller
|
if (homotypicStr == null){
|
601 |
b9cdcc88
|
Andreas Müller
|
return homotypicNameList;
|
602 |
fb3dec85
|
Andreas Müller
|
}else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
|
603 |
|
|
homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
|
604 |
|
|
}
|
605 |
|
|
|
606 |
b9cdcc88
|
Andreas Müller
|
HomotypicalGroup homotypicGroup = homotypicName.getHomotypicalGroup();
|
607 |
fb3dec85
|
Andreas Müller
|
String[] splits = homotypicStr.split("\\s*,\\s*");
|
608 |
|
|
for (String split : splits){
|
609 |
c9f78619
|
Andreas Müller
|
split = replaceHomonIlleg(split);
|
610 |
|
|
boolean isHomonym = split.matches(".*" + HOMONYM_MARKER);
|
611 |
ea7deae0
|
Andreas Müller
|
TaxonName newName = (TaxonName)makeName(state, split);
|
612 |
b9cdcc88
|
Andreas Müller
|
newName.setHomotypicalGroup(homotypicGroup); //not really necessary as this is later set anyway
|
613 |
fb3dec85
|
Andreas Müller
|
if (newName.isProtectedTitleCache()){
|
614 |
|
|
logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
|
615 |
|
|
}
|
616 |
|
|
if (isHomonym){
|
617 |
|
|
homonyms.add(newName);
|
618 |
|
|
}
|
619 |
|
|
if (isHeterotypic){
|
620 |
3ef2e1bd
|
Andreas Müller
|
Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(newName, null, null, homotypicGroup);
|
621 |
|
|
syn.setDoubtful(isDoubtful);
|
622 |
|
|
syn.addSource(makeOriginalSource(state));
|
623 |
fb3dec85
|
Andreas Müller
|
// newName.addBasionym(homotypicName);
|
624 |
|
|
}else{
|
625 |
3ef2e1bd
|
Andreas Müller
|
state.getCurrentTaxon().addHomotypicSynonymName(newName);
|
626 |
fb3dec85
|
Andreas Müller
|
}
|
627 |
b9cdcc88
|
Andreas Müller
|
handleBasionym(state, homotypicNameList, homonyms, newName);
|
628 |
|
|
homotypicNameList.add(newName);
|
629 |
fb3dec85
|
Andreas Müller
|
}
|
630 |
b9cdcc88
|
Andreas Müller
|
makeHomonyms(homonyms, homonymPart, state, homotypicGroup);
|
631 |
|
|
return homotypicNameList;
|
632 |
c9f78619
|
Andreas Müller
|
}
|
633 |
|
|
|
634 |
|
|
|
635 |
|
|
/**
|
636 |
|
|
* @param split
|
637 |
|
|
* @return
|
638 |
|
|
*/
|
639 |
|
|
private String replaceHomonIlleg(String split) {
|
640 |
|
|
String result = split.trim().replace("homon. illeg.", "nom. illeg. homon.").trim();
|
641 |
|
|
return result;
|
642 |
fb3dec85
|
Andreas Müller
|
}
|
643 |
|
|
|
644 |
|
|
|
645 |
|
|
/**
|
646 |
|
|
* @param homonyms
|
647 |
|
|
* @param homonymPart
|
648 |
|
|
* @param state
|
649 |
c9f78619
|
Andreas Müller
|
* @param currentBasionym
|
650 |
fb3dec85
|
Andreas Müller
|
*/
|
651 |
7d882578
|
Andreas Müller
|
private void makeHomonyms(List<IBotanicalName> homonyms, String homonymPartOrig, CubaImportState state,
|
652 |
b9cdcc88
|
Andreas Müller
|
HomotypicalGroup homotypicGroup) {
|
653 |
fb3dec85
|
Andreas Müller
|
String line = state.getCurrentLine() + ": ";
|
654 |
c9f78619
|
Andreas Müller
|
String homonymPart = homonymPartOrig == null ? "" : homonymPartOrig.trim();
|
655 |
fb3dec85
|
Andreas Müller
|
if (homonyms.isEmpty() && homonymPart.equals("")){
|
656 |
|
|
return;
|
657 |
|
|
}else if (homonymPart.equals("")){
|
658 |
|
|
logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
|
659 |
|
|
return;
|
660 |
|
|
}
|
661 |
|
|
homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
|
662 |
|
|
String[] splits = homonymPart.split("\\]\\s*\\[");
|
663 |
|
|
if (splits.length != homonyms.size()){
|
664 |
c9f78619
|
Andreas Müller
|
if(homonyms.size() == 0 && splits.length >= 1){
|
665 |
b9cdcc88
|
Andreas Müller
|
handleSimpleBlockingNames(splits, state, homotypicGroup);
|
666 |
c9f78619
|
Andreas Müller
|
}else{
|
667 |
|
|
logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
|
668 |
|
|
}
|
669 |
fb3dec85
|
Andreas Müller
|
return;
|
670 |
|
|
}
|
671 |
|
|
int i = 0;
|
672 |
|
|
for (String split : splits){
|
673 |
|
|
split = split.replaceAll("^non\\s+", "");
|
674 |
86536e03
|
Andreas Müller
|
TaxonName newName = (TaxonName)makeName(state, split);
|
675 |
c9f78619
|
Andreas Müller
|
// BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
|
676 |
fb3dec85
|
Andreas Müller
|
if (newName.isProtectedTitleCache()){
|
677 |
|
|
logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
|
678 |
|
|
}
|
679 |
b9cdcc88
|
Andreas Müller
|
homonyms.get(i).addRelationshipToName(newName, NameRelationshipType.LATER_HOMONYM(), null);
|
680 |
fb3dec85
|
Andreas Müller
|
i++;
|
681 |
|
|
}
|
682 |
|
|
}
|
683 |
|
|
|
684 |
c9f78619
|
Andreas Müller
|
/**
|
685 |
|
|
* @param homonymPart
|
686 |
|
|
* @param state
|
687 |
b9cdcc88
|
Andreas Müller
|
* @param homotypicGroup
|
688 |
c9f78619
|
Andreas Müller
|
*/
|
689 |
b9cdcc88
|
Andreas Müller
|
private void handleSimpleBlockingNames(String[] splitsi,
|
690 |
|
|
CubaImportState state,
|
691 |
|
|
HomotypicalGroup homotypicGroup) {
|
692 |
7d882578
|
Andreas Müller
|
List<IBotanicalName> replacementNameCandidates = new ArrayList<>();
|
693 |
c9f78619
|
Andreas Müller
|
for (String spliti : splitsi){
|
694 |
|
|
|
695 |
|
|
String split = spliti.replaceAll("^non\\s+", "");
|
696 |
86536e03
|
Andreas Müller
|
IBotanicalName newName = makeName(state, split);
|
697 |
c9f78619
|
Andreas Müller
|
if (newName.isProtectedTitleCache()){
|
698 |
|
|
logger.warn(state.getCurrentLine() + ": blocking name could not be parsed: " + split);
|
699 |
|
|
}
|
700 |
7d882578
|
Andreas Müller
|
Set<IBotanicalName> typifiedNames = (Set)homotypicGroup.getTypifiedNames();
|
701 |
|
|
Set<IBotanicalName> candidates = new HashSet<>();
|
702 |
|
|
for (IBotanicalName name : typifiedNames){
|
703 |
c9f78619
|
Andreas Müller
|
if (name.getGenusOrUninomial() != null && name.getGenusOrUninomial().equals(newName.getGenusOrUninomial())){
|
704 |
|
|
if (name.getStatus().isEmpty() || ! name.getStatus().iterator().next().getType().equals(NomenclaturalStatusType.ILLEGITIMATE())){
|
705 |
|
|
candidates.add(name);
|
706 |
|
|
}
|
707 |
|
|
}
|
708 |
|
|
}
|
709 |
|
|
if (candidates.size() == 1){
|
710 |
ea7deae0
|
Andreas Müller
|
TaxonName blockedName = (TaxonName)candidates.iterator().next();
|
711 |
b9cdcc88
|
Andreas Müller
|
newName.addRelationshipToName(blockedName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
|
712 |
|
|
replacementNameCandidates.add(blockedName);
|
713 |
c9f78619
|
Andreas Müller
|
}else{
|
714 |
|
|
logger.warn(state.getCurrentLine() + ": Blocking name could not be handled. " + candidates.size() + " candidates.");
|
715 |
|
|
}
|
716 |
|
|
}
|
717 |
b9cdcc88
|
Andreas Müller
|
makeReplacedSynonymIfPossible(state, homotypicGroup, replacementNameCandidates);
|
718 |
|
|
}
|
719 |
|
|
|
720 |
|
|
/**
|
721 |
|
|
* @param homotypicGroup
|
722 |
|
|
* @param replacementNameCandidates
|
723 |
|
|
*/
|
724 |
|
|
private void makeReplacedSynonymIfPossible(CubaImportState state,
|
725 |
|
|
HomotypicalGroup homotypicGroup,
|
726 |
7d882578
|
Andreas Müller
|
List<IBotanicalName> replacementNameCandidates) {
|
727 |
b9cdcc88
|
Andreas Müller
|
String line = state.getCurrentLine() +": ";
|
728 |
86536e03
|
Andreas Müller
|
List<IBotanicalName> replacedCandidates = new ArrayList<>();
|
729 |
ea7deae0
|
Andreas Müller
|
for (TaxonName typifiedName : homotypicGroup.getTypifiedNames()){
|
730 |
86536e03
|
Andreas Müller
|
IBotanicalName candidate = typifiedName;
|
731 |
b9cdcc88
|
Andreas Müller
|
if (candidate.getBasionymAuthorship() == null){
|
732 |
|
|
if (candidate.getStatus().isEmpty()){
|
733 |
|
|
if (! replacementNameCandidates.contains(candidate)){
|
734 |
|
|
replacedCandidates.add(candidate);
|
735 |
|
|
}
|
736 |
|
|
}
|
737 |
|
|
}
|
738 |
|
|
}
|
739 |
|
|
if (replacedCandidates.size() == 1){
|
740 |
ea7deae0
|
Andreas Müller
|
TaxonName replacedSynonym = (TaxonName)replacedCandidates.iterator().next();
|
741 |
7d882578
|
Andreas Müller
|
for (IBotanicalName replacementName : replacementNameCandidates){
|
742 |
b9cdcc88
|
Andreas Müller
|
replacementName.addReplacedSynonym(replacedSynonym, null, null, null);
|
743 |
|
|
}
|
744 |
|
|
}else if (replacedCandidates.size() < 1){
|
745 |
|
|
logger.warn(line + "No replaced synonym candidate found");
|
746 |
|
|
}else{
|
747 |
|
|
logger.warn(line + "More than 1 ("+replacedCandidates.size()+") replaced synonym candidates found");
|
748 |
|
|
}
|
749 |
c9f78619
|
Andreas Müller
|
}
|
750 |
|
|
|
751 |
|
|
|
752 |
fb3dec85
|
Andreas Müller
|
/**
|
753 |
b9cdcc88
|
Andreas Müller
|
* @param homotypicGroup
|
754 |
fb3dec85
|
Andreas Müller
|
* @param newName
|
755 |
b9cdcc88
|
Andreas Müller
|
*/
|
756 |
7d882578
|
Andreas Müller
|
private void handleBasionym(CubaImportState state, List<IBotanicalName> homotypicNameList,
|
757 |
|
|
List<IBotanicalName> homonyms, IBotanicalName newName) {
|
758 |
|
|
for (IBotanicalName existingName : homotypicNameList){
|
759 |
b9cdcc88
|
Andreas Müller
|
if (existingName != newName){ //should not happen anymore, as new name is added later
|
760 |
|
|
boolean onlyIfNotYetExists = true;
|
761 |
|
|
createBasionymRelationIfPossible(state, existingName, newName, homonyms.contains(newName), onlyIfNotYetExists);
|
762 |
|
|
}
|
763 |
|
|
}
|
764 |
|
|
}
|
765 |
|
|
|
766 |
|
|
/**
|
767 |
|
|
* @param state
|
768 |
|
|
* @param name1
|
769 |
|
|
* @param name2
|
770 |
fb3dec85
|
Andreas Müller
|
* @return
|
771 |
|
|
*/
|
772 |
7d882578
|
Andreas Müller
|
private void createBasionymRelationIfPossible(CubaImportState state, IBotanicalName name1,
|
773 |
|
|
IBotanicalName name2,
|
774 |
b9cdcc88
|
Andreas Müller
|
boolean name2isHomonym, boolean onlyIfNotYetExists) {
|
775 |
ea7deae0
|
Andreas Müller
|
TaxonName basionymName = TaxonName.castAndDeproxy(name1);
|
776 |
|
|
TaxonName newCombination = TaxonName.castAndDeproxy(name2);
|
777 |
b9cdcc88
|
Andreas Müller
|
//exactly one name must have a basionym author
|
778 |
|
|
if (name1.getBasionymAuthorship() == null && name2.getBasionymAuthorship() == null
|
779 |
|
|
|| name1.getBasionymAuthorship() != null && name2.getBasionymAuthorship() != null){
|
780 |
|
|
return;
|
781 |
|
|
}
|
782 |
|
|
|
783 |
|
|
//switch order if necessary
|
784 |
|
|
if (! name2isHomonym && basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
|
785 |
86536e03
|
Andreas Müller
|
basionymName = TaxonName.castAndDeproxy(name2);
|
786 |
|
|
newCombination = TaxonName.castAndDeproxy(name1);
|
787 |
b9cdcc88
|
Andreas Müller
|
}
|
788 |
|
|
if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())
|
789 |
6af76d56
|
Andreas Müller
|
&& BasionymRelationCreator.matchLastNamePart(basionymName, newCombination)){
|
790 |
b9cdcc88
|
Andreas Müller
|
newCombination.addBasionym(basionymName);
|
791 |
|
|
}else{
|
792 |
|
|
if ( (newCombination.getBasionyms().isEmpty() || ! onlyIfNotYetExists)
|
793 |
|
|
&& isLegitimate(basionymName)
|
794 |
|
|
&& ! name2isHomonym){
|
795 |
|
|
logger.info(state.getCurrentLine() + ": Names are potential basionyms but either author or name part do not match: " + basionymName.getTitleCache() + " <-> " + newCombination.getTitleCache());
|
796 |
|
|
}
|
797 |
fb3dec85
|
Andreas Müller
|
}
|
798 |
b9cdcc88
|
Andreas Müller
|
}
|
799 |
|
|
|
800 |
|
|
/**
|
801 |
|
|
* @param basionymName
|
802 |
|
|
* @return
|
803 |
|
|
*/
|
804 |
7d882578
|
Andreas Müller
|
private boolean isLegitimate(IBotanicalName basionymName) {
|
805 |
b9cdcc88
|
Andreas Müller
|
for (NomenclaturalStatus nomStatus : basionymName.getStatus()){
|
806 |
|
|
if (nomStatus.getType()!= null && nomStatus.getType().isIllegitimateType()){
|
807 |
|
|
return false;
|
808 |
|
|
}
|
809 |
|
|
}
|
810 |
|
|
for (NameRelationship nameRel : basionymName.getNameRelations()){
|
811 |
|
|
if (nameRel.getType()!= null && nameRel.getType().isIllegitimateType()){
|
812 |
|
|
return false;
|
813 |
|
|
}
|
814 |
fb3dec85
|
Andreas Müller
|
}
|
815 |
b9cdcc88
|
Andreas Müller
|
return true;
|
816 |
|
|
}
|
817 |
|
|
|
818 |
|
|
|
819 |
fb3dec85
|
Andreas Müller
|
/**
|
820 |
|
|
* @param combinationAuthorship
|
821 |
|
|
* @param basi
|
822 |
|
|
* @return
|
823 |
|
|
*/
|
824 |
|
|
private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
|
825 |
|
|
if (author1 == null || author2 == null){
|
826 |
|
|
return false;
|
827 |
|
|
}else {
|
828 |
|
|
return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
|
829 |
|
|
}
|
830 |
ede5c502
|
Andreas Müller
|
}
|
831 |
|
|
|
832 |
|
|
|
833 |
|
|
/**
|
834 |
|
|
* @param record
|
835 |
|
|
* @param state
|
836 |
|
|
* @param taxon
|
837 |
|
|
*/
|
838 |
fb3dec85
|
Andreas Müller
|
private void makeNotes(HashMap<String, String> record, CubaImportState state) {
|
839 |
ede5c502
|
Andreas Müller
|
String notesStr = getValue(record, "(Notas)");
|
840 |
|
|
if (notesStr == null){
|
841 |
|
|
return;
|
842 |
|
|
}else{
|
843 |
|
|
Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
|
844 |
|
|
//TODO
|
845 |
0a6a64c9
|
Andreas Müller
|
annotation.setAnnotationType(AnnotationType.TECHNICAL());
|
846 |
fb3dec85
|
Andreas Müller
|
state.getCurrentTaxon().addAnnotation(annotation);
|
847 |
ede5c502
|
Andreas Müller
|
}
|
848 |
|
|
}
|
849 |
|
|
|
850 |
|
|
|
851 |
|
|
/**
|
852 |
|
|
* @param record
|
853 |
|
|
* @param state
|
854 |
|
|
* @param familyTaxon
|
855 |
|
|
* @return
|
856 |
|
|
*/
|
857 |
fb3dec85
|
Andreas Müller
|
private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
|
858 |
4b9c9c4b
|
Andreas Müller
|
String taxonStrOrig = getValue(record, "Taxón");
|
859 |
|
|
if (taxonStrOrig == null){
|
860 |
fb3dec85
|
Andreas Müller
|
return isSynonym ? state.getCurrentTaxon() : null;
|
861 |
ede5c502
|
Andreas Müller
|
}
|
862 |
b9cdcc88
|
Andreas Müller
|
|
863 |
ede5c502
|
Andreas Müller
|
boolean isAbsent = false;
|
864 |
4b9c9c4b
|
Andreas Müller
|
String taxonStr = taxonStrOrig;
|
865 |
|
|
if (taxonStrOrig.startsWith("[") && taxonStrOrig.endsWith("]")){
|
866 |
ede5c502
|
Andreas Müller
|
taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
|
867 |
|
|
isAbsent = true;
|
868 |
|
|
}
|
869 |
|
|
|
870 |
b9cdcc88
|
Andreas Müller
|
boolean isAuct = false;
|
871 |
|
|
if (taxonStr.endsWith("auct.")){
|
872 |
|
|
isAuct = true;
|
873 |
|
|
taxonStr.replace("auct.", "").trim();
|
874 |
|
|
}
|
875 |
|
|
state.setTaxonIsAbsent(isAbsent);
|
876 |
86536e03
|
Andreas Müller
|
IBotanicalName botanicalName = makeName(state, taxonStr);
|
877 |
8422c0cd
|
Andreas Müller
|
Reference sec = getSecReference(state);
|
878 |
fb3dec85
|
Andreas Müller
|
Taxon taxon = Taxon.NewInstance(botanicalName, sec);
|
879 |
b9cdcc88
|
Andreas Müller
|
if (isAuct){
|
880 |
|
|
taxon.setAppendedPhrase("auct.");
|
881 |
|
|
}
|
882 |
|
|
|
883 |
fb3dec85
|
Andreas Müller
|
TaxonNode higherNode;
|
884 |
ede5c502
|
Andreas Müller
|
if (botanicalName.isProtectedTitleCache()){
|
885 |
4b9c9c4b
|
Andreas Müller
|
logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStrOrig);
|
886 |
fb3dec85
|
Andreas Müller
|
higherNode = familyNode;
|
887 |
|
|
}else{
|
888 |
|
|
String genusStr = botanicalName.getGenusOrUninomial();
|
889 |
|
|
Taxon genus = state.getHigherTaxon(genusStr);
|
890 |
|
|
if (genus != null){
|
891 |
|
|
higherNode = genus.getTaxonNodes().iterator().next();
|
892 |
|
|
}else{
|
893 |
86536e03
|
Andreas Müller
|
IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.GENUS());
|
894 |
b9cdcc88
|
Andreas Müller
|
name.addSource(makeOriginalSource(state));
|
895 |
fb3dec85
|
Andreas Müller
|
name.setGenusOrUninomial(genusStr);
|
896 |
|
|
genus = Taxon.NewInstance(name, sec);
|
897 |
b9cdcc88
|
Andreas Müller
|
genus.addSource(makeOriginalSource(state));
|
898 |
fb3dec85
|
Andreas Müller
|
higherNode = familyNode.addChildTaxon(genus, null, null);
|
899 |
|
|
state.putHigherTaxon(genusStr, genus);
|
900 |
|
|
}
|
901 |
ede5c502
|
Andreas Müller
|
}
|
902 |
f4682883
|
Andreas Müller
|
taxon.addSource(makeOriginalSource(state));
|
903 |
|
|
|
904 |
|
|
TaxonNode newNode = higherNode.addChildTaxon(taxon, null, null);
|
905 |
4b9c9c4b
|
Andreas Müller
|
if(isAbsent){
|
906 |
|
|
botanicalName.setTitleCache(taxonStrOrig, true);
|
907 |
f4682883
|
Andreas Müller
|
newNode.setExcluded(true);
|
908 |
4b9c9c4b
|
Andreas Müller
|
}
|
909 |
fb3dec85
|
Andreas Müller
|
|
910 |
ede5c502
|
Andreas Müller
|
return taxon;
|
911 |
|
|
}
|
912 |
|
|
|
913 |
c9f78619
|
Andreas Müller
|
private final String orthVarRegExStr = "[A-Z][a-z]+\\s[a-z]+\\s(\\(‘([a-z]){3,}’\\))\\s(\\([A-Z][a-z]+\\.?\\)\\s)?[A-Z][a-zó]+\\.?";
|
914 |
|
|
private final Pattern orthVarRegEx = Pattern.compile(orthVarRegExStr);
|
915 |
|
|
/**
|
916 |
|
|
* @param taxonStr
|
917 |
|
|
* @return
|
918 |
|
|
*/
|
919 |
86536e03
|
Andreas Müller
|
private IBotanicalName makeName(CubaImportState state, String nameStrOrig) {
|
920 |
c9f78619
|
Andreas Müller
|
//normalize
|
921 |
|
|
String nameStr = normalizeStatus(nameStrOrig);
|
922 |
|
|
//orthVar
|
923 |
|
|
Matcher orthVarMatcher = orthVarRegEx.matcher(nameStr);
|
924 |
|
|
String orthVar = null;
|
925 |
|
|
if (orthVarMatcher.matches()) {
|
926 |
|
|
orthVar = orthVarMatcher.group(1);
|
927 |
|
|
nameStr = nameStr.replace(" " + orthVar, "").trim().replaceAll("\\s{2,}", " ");
|
928 |
|
|
orthVar = orthVar.substring(2, orthVar.length() - 2);
|
929 |
b9cdcc88
|
Andreas Müller
|
}
|
930 |
c9f78619
|
Andreas Müller
|
|
931 |
b9cdcc88
|
Andreas Müller
|
boolean isNomInval = false;
|
932 |
|
|
if (nameStr.endsWith("nom. inval.")){
|
933 |
|
|
isNomInval = true;
|
934 |
|
|
nameStr = nameStr.replace("nom. inval.", "").trim();
|
935 |
c9f78619
|
Andreas Müller
|
}
|
936 |
b9cdcc88
|
Andreas Müller
|
|
937 |
86536e03
|
Andreas Müller
|
TaxonName result = (TaxonName)nameParser.parseReferencedName(nameStr, nc, Rank.SPECIES());
|
938 |
b9cdcc88
|
Andreas Müller
|
result.addSource(makeOriginalSource(state));
|
939 |
|
|
if (isNomInval){
|
940 |
|
|
result.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.INVALID()));
|
941 |
|
|
}
|
942 |
c9f78619
|
Andreas Müller
|
if (orthVar != null){
|
943 |
86536e03
|
Andreas Müller
|
TaxonName orthVarName = (TaxonName)result.clone();
|
944 |
b9cdcc88
|
Andreas Müller
|
orthVarName.addSource(makeOriginalSource(state));
|
945 |
c9f78619
|
Andreas Müller
|
//TODO
|
946 |
8422c0cd
|
Andreas Müller
|
Reference citation = null;
|
947 |
c9f78619
|
Andreas Müller
|
orthVarName.addRelationshipToName(result, NameRelationshipType.ORTHOGRAPHIC_VARIANT(), citation, null, null);
|
948 |
|
|
orthVarName.setSpecificEpithet(orthVar);
|
949 |
|
|
}
|
950 |
b9cdcc88
|
Andreas Müller
|
normalizeAuthors(result);
|
951 |
c9f78619
|
Andreas Müller
|
return result;
|
952 |
|
|
|
953 |
|
|
}
|
954 |
|
|
|
955 |
b9cdcc88
|
Andreas Müller
|
/**
|
956 |
|
|
* @param result
|
957 |
|
|
*/
|
958 |
7d882578
|
Andreas Müller
|
private void normalizeAuthors(IBotanicalName result) {
|
959 |
b9cdcc88
|
Andreas Müller
|
result.setCombinationAuthorship(normalizeAuthor(result.getCombinationAuthorship()));
|
960 |
|
|
result.setExCombinationAuthorship(normalizeAuthor(result.getExCombinationAuthorship()));
|
961 |
|
|
result.setExBasionymAuthorship(normalizeAuthor(result.getExBasionymAuthorship()));
|
962 |
|
|
result.setBasionymAuthorship(normalizeAuthor(result.getBasionymAuthorship()));
|
963 |
|
|
|
964 |
|
|
}
|
965 |
|
|
|
966 |
|
|
|
967 |
|
|
/**
|
968 |
|
|
* @param combinationAuthorship
|
969 |
|
|
* @return
|
970 |
|
|
*/
|
971 |
|
|
private TeamOrPersonBase<?> normalizeAuthor(TeamOrPersonBase<?> author) {
|
972 |
|
|
if (author == null){
|
973 |
|
|
return null;
|
974 |
|
|
}
|
975 |
|
|
TeamOrPersonBase<?> result;
|
976 |
|
|
if (author.isInstanceOf(Person.class)){
|
977 |
|
|
result = normalizePerson(CdmBase.deproxy(author, Person.class));
|
978 |
|
|
}else{
|
979 |
|
|
Team team = CdmBase.deproxy(author, Team.class);
|
980 |
|
|
List<Person> list = team.getTeamMembers();
|
981 |
|
|
for(int i = 0; i < list.size(); i++){
|
982 |
|
|
Person person = list.get(i);
|
983 |
|
|
Person tmpMember = normalizePerson(person);
|
984 |
|
|
list.set(i, tmpMember);
|
985 |
|
|
}
|
986 |
|
|
return team;
|
987 |
|
|
}
|
988 |
|
|
return result;
|
989 |
|
|
}
|
990 |
|
|
|
991 |
|
|
|
992 |
|
|
/**
|
993 |
|
|
* @param deproxy
|
994 |
|
|
* @return
|
995 |
|
|
*/
|
996 |
|
|
private Person normalizePerson(Person person) {
|
997 |
|
|
String title = person.getNomenclaturalTitle();
|
998 |
|
|
title = title.replaceAll("(?<=[a-zA-Z])\\.(?=[a-zA-Z])", ". ");
|
999 |
|
|
person.setNomenclaturalTitle(title);
|
1000 |
|
|
boolean isFilius = title.endsWith(" f.");
|
1001 |
|
|
if (isFilius){
|
1002 |
|
|
title.replace(" f.", "");
|
1003 |
|
|
}
|
1004 |
|
|
|
1005 |
|
|
String[] splits = title.split("\\s+");
|
1006 |
|
|
int nNotFirstName = isFilius ? 2 : 1;
|
1007 |
|
|
person.setLastname(splits[splits.length - nNotFirstName] + (isFilius? " f." : ""));
|
1008 |
|
|
person.setFirstname(CdmUtils.concat(" ", Arrays.copyOfRange(splits, 0, splits.length-nNotFirstName)));
|
1009 |
|
|
return person;
|
1010 |
|
|
}
|
1011 |
|
|
|
1012 |
|
|
|
1013 |
fb3dec85
|
Andreas Müller
|
/**
|
1014 |
|
|
* @param state
|
1015 |
|
|
* @return
|
1016 |
|
|
*/
|
1017 |
8422c0cd
|
Andreas Müller
|
private Reference getSecReference(CubaImportState state) {
|
1018 |
|
|
Reference result = state.getSecReference();
|
1019 |
fb3dec85
|
Andreas Müller
|
if (result == null){
|
1020 |
|
|
result = ReferenceFactory.newDatabase();
|
1021 |
|
|
result.setTitle("Flora of Cuba");
|
1022 |
|
|
state.setSecReference(result);
|
1023 |
|
|
}
|
1024 |
|
|
return result;
|
1025 |
|
|
}
|
1026 |
|
|
|
1027 |
|
|
|
1028 |
|
|
private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
|
1029 |
c9f78619
|
Andreas Müller
|
"nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub.", "nom. nud."};
|
1030 |
fb3dec85
|
Andreas Müller
|
/**
|
1031 |
|
|
* @param taxonStr
|
1032 |
|
|
* @return
|
1033 |
|
|
*/
|
1034 |
c9f78619
|
Andreas Müller
|
private String normalizeStatus(String nameStr) {
|
1035 |
|
|
if (nameStr == null){
|
1036 |
fb3dec85
|
Andreas Müller
|
return null;
|
1037 |
|
|
}
|
1038 |
c9f78619
|
Andreas Müller
|
String result = nameStr.replaceAll(HOMONYM_MARKER, "").trim();
|
1039 |
fb3dec85
|
Andreas Müller
|
for (String nomStatusStr : nomStatusStrings){
|
1040 |
|
|
nomStatusStr = " " + nomStatusStr;
|
1041 |
c9f78619
|
Andreas Müller
|
if (result.endsWith(nomStatusStr)){
|
1042 |
|
|
result = result.replace(nomStatusStr, "," + nomStatusStr);
|
1043 |
fb3dec85
|
Andreas Müller
|
}
|
1044 |
|
|
}
|
1045 |
c9f78619
|
Andreas Müller
|
result = result.replaceAll(DOUBTFUL_MARKER, "").trim();
|
1046 |
|
|
result = result.replace("[taxon]", "[infraspec.]");
|
1047 |
|
|
return result;
|
1048 |
fb3dec85
|
Andreas Müller
|
|
1049 |
|
|
|
1050 |
|
|
}
|
1051 |
|
|
|
1052 |
ede5c502
|
Andreas Müller
|
|
1053 |
|
|
/**
|
1054 |
|
|
* @param record
|
1055 |
|
|
* @param state
|
1056 |
|
|
* @return
|
1057 |
|
|
*/
|
1058 |
|
|
private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
|
1059 |
c9f78619
|
Andreas Müller
|
String familyStr = getValue(record, "Fam. default");
|
1060 |
ede5c502
|
Andreas Müller
|
if (familyStr == null){
|
1061 |
|
|
return null;
|
1062 |
|
|
}
|
1063 |
b9cdcc88
|
Andreas Müller
|
familyStr = familyStr.trim();
|
1064 |
|
|
String alternativeFamilyStr = null;
|
1065 |
|
|
if (familyStr.contains("/")){
|
1066 |
|
|
String[] splits = familyStr.split("/");
|
1067 |
|
|
if (splits.length > 2){
|
1068 |
|
|
logger.warn(state.getCurrentLine() +": " + "More than 1 alternative name:" + familyStr);
|
1069 |
|
|
}
|
1070 |
|
|
familyStr = splits[0].trim();
|
1071 |
|
|
alternativeFamilyStr = splits[1].trim();
|
1072 |
|
|
}
|
1073 |
|
|
|
1074 |
ede5c502
|
Andreas Müller
|
Taxon family = state.getHigherTaxon(familyStr);
|
1075 |
|
|
TaxonNode familyNode;
|
1076 |
|
|
if (family != null){
|
1077 |
|
|
familyNode = family.getTaxonNodes().iterator().next();
|
1078 |
|
|
}else{
|
1079 |
86536e03
|
Andreas Müller
|
TaxonName name = (TaxonName)makeFamilyName(state, familyStr);
|
1080 |
8422c0cd
|
Andreas Müller
|
Reference sec = getSecReference(state);
|
1081 |
b9cdcc88
|
Andreas Müller
|
family = Taxon.NewInstance(name, sec);
|
1082 |
fb3dec85
|
Andreas Müller
|
ITaxonTreeNode rootNode = getClassification(state);
|
1083 |
b9cdcc88
|
Andreas Müller
|
familyNode = rootNode.addChildTaxon(family, sec, null);
|
1084 |
|
|
state.putHigherTaxon(familyStr, family);
|
1085 |
|
|
|
1086 |
|
|
}
|
1087 |
|
|
|
1088 |
|
|
if (isNotBlank(alternativeFamilyStr)){
|
1089 |
|
|
NameRelationshipType type = NameRelationshipType.ALTERNATIVE_NAME();
|
1090 |
86536e03
|
Andreas Müller
|
TaxonName alternativeName = (TaxonName)makeFamilyName(state, alternativeFamilyStr);
|
1091 |
|
|
IBotanicalName familyName = family.getName();
|
1092 |
b9cdcc88
|
Andreas Müller
|
boolean hasRelation = false;
|
1093 |
|
|
for (NameRelationship nameRel : familyName.getRelationsToThisName()){
|
1094 |
|
|
if (nameRel.getType().equals(type)){
|
1095 |
|
|
if (nameRel.getFromName().equals(alternativeName)){
|
1096 |
|
|
hasRelation = true;
|
1097 |
|
|
}
|
1098 |
|
|
}
|
1099 |
|
|
}
|
1100 |
|
|
if (!hasRelation){
|
1101 |
|
|
familyName.addRelationshipFromName(alternativeName, type, null);
|
1102 |
|
|
}
|
1103 |
|
|
|
1104 |
ede5c502
|
Andreas Müller
|
}
|
1105 |
|
|
|
1106 |
|
|
return familyNode;
|
1107 |
|
|
}
|
1108 |
|
|
|
1109 |
b9cdcc88
|
Andreas Müller
|
|
1110 |
|
|
/**
|
1111 |
|
|
* @param state
|
1112 |
|
|
* @param taxon
|
1113 |
|
|
*/
|
1114 |
|
|
private void validateTaxonIsAbsent(CubaImportState state, Taxon taxon) {
|
1115 |
|
|
if (!state.isTaxonIsAbsent()){
|
1116 |
|
|
return;
|
1117 |
|
|
}
|
1118 |
|
|
|
1119 |
|
|
for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
|
1120 |
|
|
if (el instanceof Distribution){
|
1121 |
|
|
Distribution dist = (Distribution)el;
|
1122 |
|
|
NamedArea area = dist.getArea();
|
1123 |
|
|
if (isCubanArea(area)){
|
1124 |
|
|
PresenceAbsenceTerm status = dist.getStatus();
|
1125 |
|
|
if (status != null && !status.isAbsenceTerm()){
|
1126 |
|
|
if (!isDoubtfulTerm(status)){
|
1127 |
|
|
String name = taxon.getName().getTitleCache();
|
1128 |
|
|
logger.error(state.getCurrentLine() +": Taxon ("+name+")is absent'[]' but has presence distribution: " + status.getTitleCache());
|
1129 |
|
|
return;
|
1130 |
|
|
}
|
1131 |
|
|
}
|
1132 |
|
|
}
|
1133 |
|
|
}
|
1134 |
|
|
}
|
1135 |
|
|
}
|
1136 |
|
|
|
1137 |
|
|
/**
|
1138 |
|
|
* @param state
|
1139 |
|
|
* @param taxon
|
1140 |
|
|
*/
|
1141 |
|
|
private void validateEndemic(CubaImportState state, Taxon taxon) {
|
1142 |
|
|
|
1143 |
|
|
boolean hasExternalPresence = false;
|
1144 |
|
|
for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
|
1145 |
|
|
if (el instanceof Distribution){
|
1146 |
|
|
Distribution dist = (Distribution)el;
|
1147 |
|
|
NamedArea area = dist.getArea();
|
1148 |
|
|
if (!isCubanArea(area)){
|
1149 |
|
|
PresenceAbsenceTerm status = dist.getStatus();
|
1150 |
|
|
if (status != null && !status.isAbsenceTerm()){
|
1151 |
|
|
if (!isDoubtfulTerm(status)){
|
1152 |
|
|
hasExternalPresence = true;
|
1153 |
|
|
if (state.isEndemic()){
|
1154 |
|
|
String name = taxon.getName().getTitleCache();
|
1155 |
|
|
logger.error(state.getCurrentLine() +": Taxon ("+name+")is endemic but has non-cuban distribution: " + area.getIdInVocabulary() + "-" + status.getIdInVocabulary());
|
1156 |
|
|
return;
|
1157 |
|
|
}
|
1158 |
|
|
}
|
1159 |
|
|
}
|
1160 |
|
|
}
|
1161 |
|
|
}
|
1162 |
|
|
}
|
1163 |
|
|
if (!state.isEndemic() && ! hasExternalPresence){
|
1164 |
|
|
String name = taxon.getName().getTitleCache();
|
1165 |
|
|
logger.error(state.getCurrentLine() +": Taxon ("+name+")is not endemic but has no non-cuban distribution" );
|
1166 |
|
|
}
|
1167 |
|
|
}
|
1168 |
|
|
|
1169 |
|
|
|
1170 |
c9f78619
|
Andreas Müller
|
/**
|
1171 |
|
|
* @param state
|
1172 |
|
|
* @param taxon
|
1173 |
|
|
* @param famStr
|
1174 |
|
|
* @param famRef
|
1175 |
|
|
* @return
|
1176 |
|
|
*/
|
1177 |
8422c0cd
|
Andreas Müller
|
private Taxon makeAlternativeFamilyTaxon(CubaImportState state, String famStr, Reference famRef) {
|
1178 |
c9f78619
|
Andreas Müller
|
String key = famRef.getTitle() + ":"+ famStr;
|
1179 |
|
|
Taxon family = state.getHigherTaxon(key);
|
1180 |
|
|
if (family == null){
|
1181 |
86536e03
|
Andreas Müller
|
IBotanicalName name = makeFamilyName(state, famStr);
|
1182 |
c9f78619
|
Andreas Müller
|
family = Taxon.NewInstance(name, famRef);
|
1183 |
|
|
state.putHigherTaxon(key, family);
|
1184 |
|
|
}
|
1185 |
|
|
|
1186 |
|
|
return family;
|
1187 |
|
|
}
|
1188 |
|
|
|
1189 |
ede5c502
|
Andreas Müller
|
|
1190 |
b9cdcc88
|
Andreas Müller
|
/**
|
1191 |
|
|
* @param state
|
1192 |
|
|
* @param famStr
|
1193 |
|
|
* @return
|
1194 |
|
|
*/
|
1195 |
86536e03
|
Andreas Müller
|
private IBotanicalName makeFamilyName(CubaImportState state, String famStr) {
|
1196 |
|
|
IBotanicalName name = state.getFamilyName(famStr);
|
1197 |
b9cdcc88
|
Andreas Müller
|
if (name == null){
|
1198 |
ded3de15
|
Andreas Müller
|
name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
|
1199 |
b9cdcc88
|
Andreas Müller
|
name.setGenusOrUninomial(famStr);
|
1200 |
|
|
state.putFamilyName(famStr, name);
|
1201 |
|
|
name.addSource(makeOriginalSource(state));
|
1202 |
|
|
}
|
1203 |
|
|
return name;
|
1204 |
|
|
}
|
1205 |
|
|
|
1206 |
|
|
|
1207 |
ede5c502
|
Andreas Müller
|
/**
|
1208 |
|
|
* @param state
|
1209 |
|
|
* @return
|
1210 |
|
|
*/
|
1211 |
fb3dec85
|
Andreas Müller
|
private TaxonNode getClassification(CubaImportState state) {
|
1212 |
ede5c502
|
Andreas Müller
|
Classification classification = state.getClassification();
|
1213 |
|
|
if (classification == null){
|
1214 |
fb3dec85
|
Andreas Müller
|
classification = getClassificationService().find(state.getConfig().getClassificationUuid());
|
1215 |
|
|
}
|
1216 |
|
|
TaxonNode rootNode = state.getRootNode();
|
1217 |
|
|
if (rootNode == null){
|
1218 |
|
|
rootNode = getTaxonNodeService().find(plantaeUuid);
|
1219 |
|
|
}
|
1220 |
|
|
if (rootNode == null){
|
1221 |
8422c0cd
|
Andreas Müller
|
Reference sec = getSecReference(state);
|
1222 |
fb3dec85
|
Andreas Müller
|
if (classification == null){
|
1223 |
|
|
String classificationName = state.getConfig().getClassificationName();
|
1224 |
|
|
//TODO
|
1225 |
|
|
Language language = Language.DEFAULT();
|
1226 |
|
|
classification = Classification.NewInstance(classificationName, sec, language);
|
1227 |
|
|
state.setClassification(classification);
|
1228 |
|
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
1229 |
|
|
classification.getRootNode().setUuid(rootUuid);
|
1230 |
|
|
}
|
1231 |
|
|
|
1232 |
86536e03
|
Andreas Müller
|
IBotanicalName plantaeName = TaxonNameFactory.NewBotanicalInstance(Rank.KINGDOM());
|
1233 |
fb3dec85
|
Andreas Müller
|
plantaeName.setGenusOrUninomial("Plantae");
|
1234 |
|
|
Taxon plantae = Taxon.NewInstance(plantaeName, sec);
|
1235 |
|
|
TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
|
1236 |
|
|
plantaeNode.setUuid(plantaeUuid);
|
1237 |
|
|
state.setRootNode(plantaeNode);
|
1238 |
ede5c502
|
Andreas Müller
|
getClassificationService().save(classification);
|
1239 |
fb3dec85
|
Andreas Müller
|
|
1240 |
|
|
rootNode = plantaeNode;
|
1241 |
ede5c502
|
Andreas Müller
|
}
|
1242 |
fb3dec85
|
Andreas Müller
|
return rootNode;
|
1243 |
ede5c502
|
Andreas Müller
|
}
|
1244 |
|
|
|
1245 |
|
|
|
1246 |
|
|
/**
|
1247 |
|
|
* @param record
|
1248 |
|
|
* @param originalKey
|
1249 |
|
|
* @return
|
1250 |
|
|
*/
|
1251 |
|
|
private String getValue(HashMap<String, String> record, String originalKey) {
|
1252 |
|
|
String value = record.get(originalKey);
|
1253 |
|
|
if (! StringUtils.isBlank(value)) {
|
1254 |
|
|
if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
|
1255 |
|
|
value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
|
1256 |
|
|
return value;
|
1257 |
|
|
}else{
|
1258 |
|
|
return null;
|
1259 |
|
|
}
|
1260 |
|
|
}
|
1261 |
|
|
|
1262 |
|
|
|
1263 |
|
|
|
1264 |
|
|
/**
|
1265 |
|
|
* Stores taxa records in DB
|
1266 |
|
|
*/
|
1267 |
|
|
@Override
|
1268 |
|
|
protected void firstPass(CubaImportState state) {
|
1269 |
b9cdcc88
|
Andreas Müller
|
boolean isSynonymOnly = false;
|
1270 |
ede5c502
|
Andreas Müller
|
|
1271 |
f5c05984
|
Andreas Müller
|
String line = state.getCurrentLine() + ": ";
|
1272 |
fb3dec85
|
Andreas Müller
|
HashMap<String, String> record = state.getOriginalRecord();
|
1273 |
ede5c502
|
Andreas Müller
|
|
1274 |
fb3dec85
|
Andreas Müller
|
Set<String> keys = record.keySet();
|
1275 |
|
|
for (String key: keys) {
|
1276 |
|
|
if (! expectedKeys.contains(key)){
|
1277 |
f5c05984
|
Andreas Müller
|
logger.warn(line + "Unexpected Key: " + key);
|
1278 |
fb3dec85
|
Andreas Müller
|
}
|
1279 |
|
|
}
|
1280 |
ede5c502
|
Andreas Müller
|
|
1281 |
c9f78619
|
Andreas Müller
|
if (record.get("Fam. default") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
|
1282 |
fb3dec85
|
Andreas Müller
|
//second header line, don't handle
|
1283 |
|
|
return;
|
1284 |
|
|
}
|
1285 |
ede5c502
|
Andreas Müller
|
|
1286 |
fb3dec85
|
Andreas Müller
|
//Fam.
|
1287 |
|
|
TaxonNode familyTaxon = getFamilyTaxon(record, state);
|
1288 |
|
|
if (familyTaxon == null){
|
1289 |
|
|
if (record.get("Taxón") != null){
|
1290 |
f5c05984
|
Andreas Müller
|
logger.warn(line + "Family not recognized but taxon exists: " + record.get("Taxón"));
|
1291 |
fb3dec85
|
Andreas Müller
|
return;
|
1292 |
|
|
}else if (record.get("Syn.") == null){
|
1293 |
f5c05984
|
Andreas Müller
|
logger.warn(line + "Family not recognized but also no synonym exists");
|
1294 |
fb3dec85
|
Andreas Müller
|
return;
|
1295 |
|
|
}else{
|
1296 |
b9cdcc88
|
Andreas Müller
|
isSynonymOnly = true;
|
1297 |
fb3dec85
|
Andreas Müller
|
}
|
1298 |
|
|
}
|
1299 |
ede5c502
|
Andreas Müller
|
|
1300 |
b9cdcc88
|
Andreas Müller
|
//Taxón
|
1301 |
|
|
Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonymOnly);
|
1302 |
|
|
if (taxon == null && ! isSynonymOnly){
|
1303 |
f5c05984
|
Andreas Müller
|
logger.warn(line + "taxon could not be created and is null");
|
1304 |
fb3dec85
|
Andreas Müller
|
return;
|
1305 |
|
|
}
|
1306 |
|
|
state.setCurrentTaxon(taxon);
|
1307 |
ede5c502
|
Andreas Müller
|
|
1308 |
c9f78619
|
Andreas Müller
|
//Fam. ALT
|
1309 |
4b9c9c4b
|
Andreas Müller
|
if (!isSynonymOnly){
|
1310 |
|
|
makeAlternativeFamilies(record, state, familyTaxon, taxon);
|
1311 |
|
|
}
|
1312 |
c9f78619
|
Andreas Müller
|
|
1313 |
fb3dec85
|
Andreas Müller
|
//(Notas)
|
1314 |
|
|
makeNotes(record, state);
|
1315 |
ede5c502
|
Andreas Müller
|
|
1316 |
fb3dec85
|
Andreas Müller
|
//Syn.
|
1317 |
b9cdcc88
|
Andreas Müller
|
makeSynonyms(record, state, !isSynonymOnly);
|
1318 |
ede5c502
|
Andreas Müller
|
|
1319 |
fb3dec85
|
Andreas Müller
|
//End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
|
1320 |
|
|
makeCubanDistribution(record, state);
|
1321 |
ede5c502
|
Andreas Müller
|
|
1322 |
|
|
|
1323 |
0a6a64c9
|
Andreas Müller
|
// "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
1324 |
fb3dec85
|
Andreas Müller
|
// "CuC","VC","Ci","SS","CA","Cam","LT",
|
1325 |
|
|
// "CuE","Gr","Ho","SC","Gu",
|
1326 |
|
|
makeProvincesDistribution(record, state);
|
1327 |
ede5c502
|
Andreas Müller
|
|
1328 |
c9f78619
|
Andreas Müller
|
// "Esp","Ja","PR","Men","Bah","Cay",
|
1329 |
|
|
// "AmN","AmC","AmS","VM"});
|
1330 |
|
|
makeOtherAreasDistribution(record, state);
|
1331 |
0a6a64c9
|
Andreas Müller
|
|
1332 |
b9cdcc88
|
Andreas Müller
|
validateTaxonIsAbsent(state, taxon);
|
1333 |
|
|
if (!isSynonymOnly){
|
1334 |
|
|
validateEndemic(state, taxon);
|
1335 |
|
|
}
|
1336 |
0a6a64c9
|
Andreas Müller
|
|
1337 |
|
|
state.setHighestStatusForTaxon(null);
|
1338 |
|
|
|
1339 |
fb3dec85
|
Andreas Müller
|
return;
|
1340 |
|
|
}
|
1341 |
ede5c502
|
Andreas Müller
|
|
1342 |
|
|
|
1343 |
b9cdcc88
|
Andreas Müller
|
/**
|
1344 |
|
|
* @param state
|
1345 |
|
|
* @return
|
1346 |
|
|
*/
|
1347 |
|
|
private IdentifiableSource makeOriginalSource(CubaImportState state) {
|
1348 |
|
|
return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
|
1349 |
|
|
}
|
1350 |
|
|
/**
|
1351 |
|
|
* @param state
|
1352 |
|
|
* @return
|
1353 |
|
|
*/
|
1354 |
|
|
private DescriptionElementSource makeDescriptionSource(CubaImportState state) {
|
1355 |
|
|
return DescriptionElementSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
|
1356 |
|
|
}
|
1357 |
|
|
|
1358 |
|
|
private static Set<UUID> doubtfulStatus = new HashSet<>();
|
1359 |
ede5c502
|
Andreas Müller
|
|
1360 |
b9cdcc88
|
Andreas Müller
|
/**
|
1361 |
|
|
* @param status
|
1362 |
|
|
* @return
|
1363 |
|
|
*/
|
1364 |
|
|
private boolean isDoubtfulTerm(PresenceAbsenceTerm status) {
|
1365 |
|
|
if (doubtfulStatus.isEmpty()){
|
1366 |
|
|
doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyNaturalisedUuid);
|
1367 |
|
|
doubtfulStatus.add(CubaTransformer.doubtfulIndigenousDoubtfulUuid);
|
1368 |
|
|
doubtfulStatus.add(CubaTransformer.endemicDoubtfullyPresentUuid);
|
1369 |
|
|
doubtfulStatus.add(CubaTransformer.naturalisedDoubtfullyPresentUuid);
|
1370 |
|
|
doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyPresentUuid);
|
1371 |
|
|
doubtfulStatus.add(CubaTransformer.occasionallyCultivatedUuid);
|
1372 |
|
|
doubtfulStatus.add(CubaTransformer.rareCasualUuid);
|
1373 |
|
|
doubtfulStatus.add(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE().getUuid());
|
1374 |
|
|
doubtfulStatus.add(PresenceAbsenceTerm.CULTIVATED_PRESENCE_QUESTIONABLE().getUuid());
|
1375 |
|
|
}
|
1376 |
|
|
boolean isDoubtful = doubtfulStatus.contains(status.getUuid());
|
1377 |
|
|
return isDoubtful;
|
1378 |
|
|
}
|
1379 |
|
|
|
1380 |
|
|
|
1381 |
|
|
/**
|
1382 |
|
|
* @param area
|
1383 |
|
|
* @return
|
1384 |
|
|
*/
|
1385 |
|
|
private boolean isCubanArea(NamedArea area) {
|
1386 |
|
|
if (area.getUuid().equals(CubaTransformer.uuidCuba)){
|
1387 |
|
|
return true;
|
1388 |
|
|
}else if (area.getPartOf()!= null){
|
1389 |
|
|
return isCubanArea(area.getPartOf());
|
1390 |
|
|
}else{
|
1391 |
|
|
return false;
|
1392 |
|
|
}
|
1393 |
|
|
}
|
1394 |
|
|
|
1395 |
|
|
|
1396 |
|
|
/**
|
1397 |
fb3dec85
|
Andreas Müller
|
* @param record
|
1398 |
|
|
* @param state
|
1399 |
c9f78619
|
Andreas Müller
|
* @param familyTaxon
|
1400 |
|
|
* @param taxon
|
1401 |
|
|
*/
|
1402 |
|
|
private void makeAlternativeFamilies(HashMap<String, String> record,
|
1403 |
|
|
CubaImportState state,
|
1404 |
|
|
TaxonNode familyTaxon,
|
1405 |
|
|
Taxon taxon) {
|
1406 |
|
|
|
1407 |
|
|
String famFRC = record.get("Fam. FRC");
|
1408 |
|
|
String famAS = record.get("Fam. A&S");
|
1409 |
|
|
String famFC = record.get("Fam. FC");
|
1410 |
|
|
|
1411 |
8422c0cd
|
Andreas Müller
|
Reference refFRC = makeReference(state, CubaTransformer.uuidRefFRC);
|
1412 |
|
|
Reference refAS = makeReference(state, CubaTransformer.uuidRefAS);
|
1413 |
|
|
Reference refFC = makeReference(state, CubaTransformer.uuidRefFC);
|
1414 |
c9f78619
|
Andreas Müller
|
|
1415 |
|
|
makeSingleAlternativeFamily(state, taxon, famFRC, refFRC);
|
1416 |
|
|
makeSingleAlternativeFamily(state, taxon, famAS, refAS);
|
1417 |
|
|
makeSingleAlternativeFamily(state, taxon, famFC, refFC);
|
1418 |
|
|
}
|
1419 |
|
|
|
1420 |
|
|
|
1421 |
|
|
/**
|
1422 |
|
|
* @param state
|
1423 |
|
|
* @param uuidreffrc
|
1424 |
|
|
* @return
|
1425 |
|
|
*/
|
1426 |
8422c0cd
|
Andreas Müller
|
private Reference makeReference(CubaImportState state, UUID uuidRef) {
|
1427 |
|
|
Reference ref = state.getReference(uuidRef);
|
1428 |
c9f78619
|
Andreas Müller
|
if (ref == null){
|
1429 |
|
|
ref = getReferenceService().find(uuidRef);
|
1430 |
|
|
state.putReference(uuidRef, ref);
|
1431 |
|
|
}
|
1432 |
|
|
return ref;
|
1433 |
|
|
}
|
1434 |
|
|
|
1435 |
|
|
|
1436 |
|
|
/**
|
1437 |
|
|
* @param state
|
1438 |
|
|
* @param taxon
|
1439 |
|
|
* @param famString
|
1440 |
|
|
* @param famRef
|
1441 |
|
|
*/
|
1442 |
8422c0cd
|
Andreas Müller
|
private void makeSingleAlternativeFamily(CubaImportState state, Taxon taxon, String famStr, Reference famRef) {
|
1443 |
c9f78619
|
Andreas Müller
|
if (isBlank(famStr)){
|
1444 |
4b9c9c4b
|
Andreas Müller
|
famStr = "-";
|
1445 |
|
|
// return;
|
1446 |
c9f78619
|
Andreas Müller
|
}
|
1447 |
|
|
|
1448 |
|
|
TaxonDescription desc = getTaxonDescription(taxon, false, true);
|
1449 |
|
|
|
1450 |
|
|
UUID altFamUuid1;
|
1451 |
|
|
UUID altFamUuid2;
|
1452 |
|
|
try {
|
1453 |
|
|
altFamUuid1 = state.getTransformer().getFeatureUuid("Alt.Fam.");
|
1454 |
|
|
altFamUuid2 = state.getTransformer().getFeatureUuid("Alt.Fam.2");
|
1455 |
|
|
} catch (UndefinedTransformerMethodException e) {
|
1456 |
|
|
throw new RuntimeException(e);
|
1457 |
|
|
}
|
1458 |
|
|
|
1459 |
|
|
|
1460 |
|
|
Taxon famTaxon = makeAlternativeFamilyTaxon(state, famStr, famRef);
|
1461 |
|
|
|
1462 |
|
|
|
1463 |
|
|
//TextData
|
1464 |
4b9c9c4b
|
Andreas Müller
|
Feature feature1 = getFeature(state, altFamUuid1, "Families in other Floras (Text)", "Families in other Floras (Text)", "Other floras", null);
|
1465 |
|
|
feature1.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
|
1466 |
c9f78619
|
Andreas Müller
|
// TextData textData = TextData.NewInstance(feature1, famStr, Language.DEFAULT(), null);
|
1467 |
|
|
TextData textData = TextData.NewInstance(feature1, null, Language.DEFAULT(), null);
|
1468 |
|
|
textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null, famTaxon.getName(),null);
|
1469 |
|
|
desc.addElement(textData);
|
1470 |
|
|
|
1471 |
|
|
|
1472 |
|
|
|
1473 |
|
|
//TaxonInteraction
|
1474 |
4b9c9c4b
|
Andreas Müller
|
Feature feature2 = getFeature(state, altFamUuid2, "Families in other Floras", "Families in other Floras", "Other floras(2)", null);
|
1475 |
c9f78619
|
Andreas Müller
|
feature2.setSupportsTaxonInteraction(true);
|
1476 |
4b9c9c4b
|
Andreas Müller
|
feature2.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
|
1477 |
c9f78619
|
Andreas Müller
|
TaxonInteraction taxInteract = TaxonInteraction.NewInstance(feature2);
|
1478 |
4b9c9c4b
|
Andreas Müller
|
textData.putText(Language.SPANISH_CASTILIAN(), "Familias en otras Floras");
|
1479 |
c9f78619
|
Andreas Müller
|
taxInteract.setTaxon2(famTaxon);
|
1480 |
|
|
taxInteract.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null);
|
1481 |
|
|
desc.addElement(taxInteract);
|
1482 |
|
|
|
1483 |
|
|
//Concept Relation
|
1484 |
|
|
famTaxon.addTaxonRelation(taxon, TaxonRelationshipType.INCLUDES(), taxon.getSec(), null);
|
1485 |
|
|
|
1486 |
|
|
}
|
1487 |
|
|
|
1488 |
|
|
|
1489 |
|
|
|
1490 |
|
|
|
1491 |
|
|
|
1492 |
|
|
/**
|
1493 |
|
|
* @param record
|
1494 |
|
|
* @param state
|
1495 |
fb3dec85
|
Andreas Müller
|
* @param taxon
|
1496 |
|
|
*/
|
1497 |
|
|
// "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
1498 |
|
|
// "CuC","VC","Ci","SS","CA","Cam","LT",
|
1499 |
|
|
// "CuE","Gr","Ho","SC","Gu",
|
1500 |
|
|
private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
|
1501 |
|
|
List<String> areaKeys = Arrays.asList(new String[]{
|
1502 |
|
|
"CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
|
1503 |
|
|
"CuC","VC","Ci","SS","CA","Cam","LT",
|
1504 |
|
|
"CuE","Gr","Ho","SC","Gu",
|
1505 |
0a6a64c9
|
Andreas Müller
|
});
|
1506 |
|
|
for (String areaKey : areaKeys){
|
1507 |
|
|
state.setCubanProvince(true);
|
1508 |
|
|
makeSingleProvinceDistribution(areaKey, record, state);
|
1509 |
|
|
}
|
1510 |
|
|
}
|
1511 |
|
|
|
1512 |
|
|
private void makeOtherAreasDistribution(HashMap<String, String> record, CubaImportState state) {
|
1513 |
|
|
List<String> areaKeys = Arrays.asList(new String[]{
|
1514 |
fb3dec85
|
Andreas Müller
|
"Esp","Ja","PR","Men","Bah","Cay",
|
1515 |
|
|
"AmN","AmC","AmS","VM"});
|
1516 |
|
|
for (String areaKey : areaKeys){
|
1517 |
0a6a64c9
|
Andreas Müller
|
state.setCubanProvince(false);
|
1518 |
fb3dec85
|
Andreas Müller
|
makeSingleProvinceDistribution(areaKey, record, state);
|
1519 |
|
|
}
|
1520 |
|
|
}
|
1521 |
ede5c502
|
Andreas Müller
|
|
1522 |
fb3dec85
|
Andreas Müller
|
|
1523 |
ede5c502
|
Andreas Müller
|
|
1524 |
|
|
|
1525 |
fb3dec85
|
Andreas Müller
|
/**
|
1526 |
|
|
* @param areaKey
|
1527 |
|
|
* @param record
|
1528 |
|
|
* @param state
|
1529 |
0a6a64c9
|
Andreas Müller
|
* @param highestStatus
|
1530 |
fb3dec85
|
Andreas Müller
|
* @return
|
1531 |
|
|
* @throws UndefinedTransformerMethodException
|
1532 |
|
|
*/
|
1533 |
0a6a64c9
|
Andreas Müller
|
private PresenceAbsenceTerm makeProvinceStatus(String areaKey,
|
1534 |
|
|
HashMap<String, String> record,
|
1535 |
|
|
CubaImportState state) throws UndefinedTransformerMethodException {
|
1536 |
|
|
|
1537 |
fb3dec85
|
Andreas Müller
|
String statusStr = record.get(areaKey);
|
1538 |
|
|
if (statusStr == null){
|
1539 |
|
|
return null;
|
1540 |
b9cdcc88
|
Andreas Müller
|
}else{
|
1541 |
|
|
statusStr = statusStr.trim();
|
1542 |
fb3dec85
|
Andreas Müller
|
}
|
1543 |
|
|
PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
|
1544 |
|
|
if (status == null){
|
1545 |
b9cdcc88
|
Andreas Müller
|
// PresenceAbsenceTerm highestStatus = state.getHighestStatusForTaxon();
|
1546 |
0a6a64c9
|
Andreas Müller
|
if (state.isCubanProvince() && isMinus(statusStr)){
|
1547 |
b9cdcc88
|
Andreas Müller
|
// getAbsenceTermForStatus(state, highestStatus);
|
1548 |
|
|
//we now handle cuban provinces same as external regions
|
1549 |
|
|
status = state.getTransformer().getPresenceTermByKey("--");
|
1550 |
0a6a64c9
|
Andreas Müller
|
}else if (! state.isCubanProvince() && isMinus(statusStr)){
|
1551 |
c9f78619
|
Andreas Müller
|
status = state.getTransformer().getPresenceTermByKey("--");
|
1552 |
0a6a64c9
|
Andreas Müller
|
}else{
|
1553 |
b9cdcc88
|
Andreas Müller
|
// logger.warn("Unhandled status str for provinces / external regions: " + statusStr);
|
1554 |
0a6a64c9
|
Andreas Müller
|
UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
|
1555 |
b9cdcc88
|
Andreas Müller
|
if (statusUuid == null){
|
1556 |
|
|
logger.error(state.getCurrentLine() + ": Undefined status str for provinces / external regions. No UUID given: '" + statusStr + "'");
|
1557 |
|
|
}else{
|
1558 |
|
|
status = getPresenceTerm(state, statusUuid, statusStr, statusStr, statusStr, false);
|
1559 |
|
|
}
|
1560 |
0a6a64c9
|
Andreas Müller
|
}
|
1561 |
|
|
}
|
1562 |
|
|
|
1563 |
|
|
return status;
|
1564 |
|
|
}
|
1565 |
|
|
|
1566 |
|
|
|
1567 |
|
|
/**
|
1568 |
|
|
* @param highestStatus
|
1569 |
|
|
* @throws UndefinedTransformerMethodException
|
1570 |
|
|
*/
|
1571 |
|
|
private PresenceAbsenceTerm getAbsenceTermForStatus(CubaImportState state, PresenceAbsenceTerm highestStatus) throws UndefinedTransformerMethodException {
|
1572 |
|
|
if (highestStatus == null){
|
1573 |
|
|
logger.warn(state.getCurrentLine() + ": Highest status not defined");
|
1574 |
|
|
return null;
|
1575 |
|
|
}
|
1576 |
|
|
PresenceAbsenceTerm result = null;
|
1577 |
|
|
if (highestStatus.equals(getStatus(state, "E"))){
|
1578 |
|
|
result = getStatus(state, "-E");
|
1579 |
|
|
}else if (highestStatus.getUuid().equals(state.getTransformer().getPresenceTermUuid("Ind.")) || highestStatus.equals(PresenceAbsenceTerm.NATIVE())){
|
1580 |
|
|
result = getStatus(state, "-Ind.");
|
1581 |
|
|
}else if (highestStatus.equals(getStatus(state, "Ind.?"))){
|
1582 |
|
|
result = getStatus(state, "-Ind.?"); //TODO
|
1583 |
|
|
}else if (highestStatus.equals(getStatus(state, "N"))){
|
1584 |
|
|
result = getStatus(state, "-N");
|
1585 |
|
|
}else if (highestStatus.equals(getStatus(state, "P"))){
|
1586 |
|
|
result = getStatus(state, "-P");
|
1587 |
|
|
}else if (highestStatus.equals(getStatus(state, "A"))){
|
1588 |
|
|
result = getStatus(state, "-A");
|
1589 |
|
|
}else if (highestStatus.equals(getStatus(state, "C"))){
|
1590 |
|
|
result = getStatus(state, "-C");
|
1591 |
|
|
}
|
1592 |
|
|
logger.warn(state.getCurrentLine() + ": Absent province status could not be defined for highest status " + highestStatus.getTitleCache());
|
1593 |
|
|
return result;
|
1594 |
|
|
}
|
1595 |
|
|
|
1596 |
|
|
|
1597 |
|
|
/**
|
1598 |
|
|
* @param string
|
1599 |
|
|
* @return
|
1600 |
|
|
* @throws UndefinedTransformerMethodException
|
1601 |
|
|
*/
|
1602 |
|
|
private PresenceAbsenceTerm getStatus(CubaImportState state, String key) throws UndefinedTransformerMethodException {
|
1603 |
|
|
PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(key);
|
1604 |
|
|
if (status == null){
|
1605 |
|
|
UUID statusUuid = state.getTransformer().getPresenceTermUuid(key);
|
1606 |
fb3dec85
|
Andreas Müller
|
status = getPresenceTerm(state, statusUuid, null, null, null, false);
|
1607 |
|
|
}
|
1608 |
|
|
return status;
|
1609 |
|
|
}
|
1610 |
ede5c502
|
Andreas Müller
|
|
1611 |
|
|
|
1612 |
fb3dec85
|
Andreas Müller
|
/**
|
1613 |
ede5c502
|
Andreas Müller
|
* Stores parent-child, synonym and common name relationships
|
1614 |
|
|
*/
|
1615 |
|
|
@Override
|
1616 |
|
|
protected void secondPass(CubaImportState state) {
|
1617 |
|
|
// CyprusRow cyprusRow = state.getCyprusRow();
|
1618 |
|
|
return;
|
1619 |
|
|
}
|
1620 |
|
|
|
1621 |
|
|
|
1622 |
|
|
@Override
|
1623 |
|
|
protected boolean isIgnore(CubaImportState state) {
|
1624 |
|
|
return ! state.getConfig().isDoTaxa();
|
1625 |
|
|
}
|
1626 |
|
|
|
1627 |
|
|
@Override
|
1628 |
|
|
protected boolean doCheck(CubaImportState state) {
|
1629 |
|
|
logger.warn("DoCheck not yet implemented for CubaExcelImport");
|
1630 |
|
|
return true;
|
1631 |
|
|
}
|
1632 |
|
|
|
1633 |
|
|
}
|