Project

General

Profile

Download (66.9 KB) Statistics
| Branch: | Revision:
1 ede5c502 Andreas Müller
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
10
package eu.etaxonomy.cdm.io.cuba;
11
12 fb3dec85 Andreas Müller
import java.util.ArrayList;
13 ede5c502 Andreas Müller
import java.util.Arrays;
14
import java.util.HashMap;
15 c9f78619 Andreas Müller
import java.util.HashSet;
16 ede5c502 Andreas Müller
import java.util.List;
17
import java.util.Set;
18
import java.util.UUID;
19 fb3dec85 Andreas Müller
import java.util.regex.Matcher;
20
import java.util.regex.Pattern;
21 ede5c502 Andreas Müller
22
import org.apache.commons.lang.StringUtils;
23
import org.apache.log4j.Logger;
24
import org.springframework.stereotype.Component;
25
26
import eu.etaxonomy.cdm.common.CdmUtils;
27
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
28
import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
29 b9cdcc88 Andreas Müller
import eu.etaxonomy.cdm.model.agent.Person;
30 fb3dec85 Andreas Müller
import eu.etaxonomy.cdm.model.agent.Team;
31
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
32 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.common.Annotation;
33
import eu.etaxonomy.cdm.model.common.AnnotationType;
34 b9cdcc88 Andreas Müller
import eu.etaxonomy.cdm.model.common.CdmBase;
35
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
36 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.common.Language;
37 c9f78619 Andreas Müller
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
38 4b9c9c4b Andreas Müller
import eu.etaxonomy.cdm.model.common.Representation;
39 b9cdcc88 Andreas Müller
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
40
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
41 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.description.Distribution;
42 c9f78619 Andreas Müller
import eu.etaxonomy.cdm.model.description.Feature;
43 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
44
import eu.etaxonomy.cdm.model.description.TaxonDescription;
45 c9f78619 Andreas Müller
import eu.etaxonomy.cdm.model.description.TaxonInteraction;
46
import eu.etaxonomy.cdm.model.description.TextData;
47 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.location.NamedArea;
48 b9cdcc88 Andreas Müller
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
49 7d882578 Andreas Müller
import eu.etaxonomy.cdm.model.name.IBotanicalName;
50 b9cdcc88 Andreas Müller
import eu.etaxonomy.cdm.model.name.NameRelationship;
51 fb3dec85 Andreas Müller
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
52 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
53 fb3dec85 Andreas Müller
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
54
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
55 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.name.Rank;
56 86536e03 Andreas Müller
import eu.etaxonomy.cdm.model.name.TaxonName;
57 ded3de15 Andreas Müller
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
58 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.reference.Reference;
59 fb3dec85 Andreas Müller
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
60 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.taxon.Classification;
61 fb3dec85 Andreas Müller
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
62 3ef2e1bd Andreas Müller
import eu.etaxonomy.cdm.model.taxon.Synonym;
63
import eu.etaxonomy.cdm.model.taxon.SynonymType;
64 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.model.taxon.Taxon;
65
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
66 c9f78619 Andreas Müller
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
67 6af76d56 Andreas Müller
import eu.etaxonomy.cdm.strategy.homotypicgroup.BasionymRelationCreator;
68 ede5c502 Andreas Müller
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
69
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
70
71
/**
72
 * @author a.mueller
73
 * @created 05.01.2016
74
 */
75
76
@Component
77
public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
78
    private static final long serialVersionUID = -747486709409732371L;
79
    private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
80
81 5cdaf78e Andreas Müller
    private static final String HOMONYM_MARKER = "\\s+homon.?$";
82 fb3dec85 Andreas Müller
    private static final String DOUBTFUL_MARKER = "^\\?\\s?";
83
84
85
    private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
86
    private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
87
88 ede5c502 Andreas Müller
    private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
89
    private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
90
91 c9f78619 Andreas Müller
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
92
            "Fam. default","Fam. FRC","Fam. A&S","Fam. FC",
93
            "Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
94 ede5c502 Andreas Müller
95
	@Override
96
    protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
97 fb3dec85 Andreas Müller
	    //we do everything in firstPass here
98 ede5c502 Andreas Müller
    	return;
99
    }
100
101
102
    /**
103
     * @param record
104
     * @param state
105
     * @param taxon
106
     */
107 fb3dec85 Andreas Müller
    private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
108 ede5c502 Andreas Müller
        try {
109 4b9c9c4b Andreas Müller
            NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("Cu"), null, null, null, null, null);
110 fb3dec85 Andreas Müller
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
111 0a6a64c9 Andreas Müller
            List<PresenceAbsenceTerm> statuss =  makeCubanStatuss(record, state);
112 fb3dec85 Andreas Müller
            for (PresenceAbsenceTerm status : statuss){
113
                Distribution distribution = Distribution.NewInstance(cuba, status);
114
                desc.addElement(distribution);
115 b9cdcc88 Andreas Müller
                distribution.addSource(makeDescriptionSource(state));
116 fb3dec85 Andreas Müller
            }
117 ede5c502 Andreas Müller
        } catch (UndefinedTransformerMethodException e) {
118
            e.printStackTrace();
119
        }
120
    }
121
122
123
    /**
124
     * @param record
125
     * @param state
126
     * @return
127
     * @throws UndefinedTransformerMethodException
128
     */
129 0a6a64c9 Andreas Müller
    private List<PresenceAbsenceTerm> makeCubanStatuss(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
130
        PresenceAbsenceTerm highestStatus = null;
131 ede5c502 Andreas Müller
132 fb3dec85 Andreas Müller
        String line = state.getCurrentLine() + ": ";
133
        List<PresenceAbsenceTerm> result = new ArrayList<>();
134
135 ede5c502 Andreas Müller
        String endemicStr = getValue(record, "End");
136
        String indigenousStr = getValue(record, "Ind");
137
        String indigenousDoubtStr = getValue(record, "Ind? D");
138
        String naturalisedStr = getValue(record, "Nat");
139
        String dudStr = getValue(record, "Dud P");
140
        String advStr = getValue(record, "Adv");
141
        String cultStr = getValue(record, "Cult C");
142
143 b9cdcc88 Andreas Müller
        state.setEndemic(false);
144
145 ede5c502 Andreas Müller
        if (endemicStr != null){
146 fb3dec85 Andreas Müller
            if(endemicStr.equals("+")){
147
                PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
148
                result.add(endemicState);
149 0a6a64c9 Andreas Müller
                highestStatus = endemicState;
150 b9cdcc88 Andreas Müller
                state.setEndemic(true);
151 fb3dec85 Andreas Müller
            }else if(isMinus(endemicStr)){
152
                UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
153
                PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
154
                result.add(endemicState);
155 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "endemic", false);
156 c9f78619 Andreas Müller
            }else if(endemicStr.equals("?")){
157
                UUID endemicDoubtfulUuid = state.getTransformer().getPresenceTermUuid("?E");
158
                PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicDoubtfulUuid, null, null, null, false);
159
                result.add(endemicState);
160
                checkAbsentHighestState(highestStatus, line, "endemic", false);
161 ede5c502 Andreas Müller
            }else{
162 fb3dec85 Andreas Müller
                logger.warn(line + "Endemic not recognized: " + endemicStr);
163 ede5c502 Andreas Müller
            }
164 fb3dec85 Andreas Müller
        }
165
        if (indigenousStr != null){
166
            if(indigenousStr.equals("+")){
167 b9cdcc88 Andreas Müller
                PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("Ind.");
168
//                PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
169 fb3dec85 Andreas Müller
                result.add(indigenousState);
170 0a6a64c9 Andreas Müller
                highestStatus = highestStatus != null ? highestStatus : indigenousState;
171 fb3dec85 Andreas Müller
            }else if(isMinus(indigenousStr)){
172 0a6a64c9 Andreas Müller
                PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("-Ind.");
173
                result.add(indigenousState);
174
                checkAbsentHighestState(highestStatus, line, "indigenous", false);
175 fb3dec85 Andreas Müller
            }else if(indigenousStr.equals("?")){
176 b9cdcc88 Andreas Müller
                PresenceAbsenceTerm indigenousDoubtState = state.getTransformer().getPresenceTermByKey("?Ind.");
177
//                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
178 fb3dec85 Andreas Müller
                result.add(indigenousDoubtState);
179 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "indigenous", true);
180 ede5c502 Andreas Müller
            }else{
181 fb3dec85 Andreas Müller
                logger.warn(line + "Indigenous not recognized: " + indigenousStr);
182
            }
183
        }
184
        if(indigenousDoubtStr != null){
185
            if(indigenousDoubtStr.equals("D")){
186 b9cdcc88 Andreas Müller
                PresenceAbsenceTerm doubtIndigenousState = state.getTransformer().getPresenceTermByKey("Ind.?");
187
//                PresenceAbsenceTerm doubtIndigenousState = getPresenceTerm(state, doubtIndigenousUuid, null, null, null, false);
188
                result.add(doubtIndigenousState);
189
                highestStatus = highestStatus != null ? highestStatus : doubtIndigenousState;
190
            }else if(isMinus(indigenousDoubtStr)){
191
                UUID doubtIndigenousErrorUuid = state.getTransformer().getPresenceTermUuid("-Ind.?");
192
                PresenceAbsenceTerm doubtIndigenousErrorState = getPresenceTerm(state, doubtIndigenousErrorUuid, null, null, null, false);
193
                result.add(doubtIndigenousErrorState);
194
                checkAbsentHighestState(highestStatus, line, "doubtfully indigenous", true);
195 fb3dec85 Andreas Müller
            }else{
196 b9cdcc88 Andreas Müller
                logger.warn(line + "doubtfully indigenous not recognized: " + indigenousDoubtStr);
197 fb3dec85 Andreas Müller
            }
198
        }
199
        if(naturalisedStr != null){
200
            if(naturalisedStr.equals("N")){
201 0a6a64c9 Andreas Müller
                PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
202
                result.add(haturalizedState);
203
                highestStatus = highestStatus != null ? highestStatus : haturalizedState;
204 fb3dec85 Andreas Müller
            }else if(isMinus(naturalisedStr)){
205
                UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
206
                PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
207
                result.add(naturalisedErrorState);
208 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "naturalized", false);
209 fb3dec85 Andreas Müller
            }else if(naturalisedStr.equals("?")){
210
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
211
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
212
                result.add(naturalisedDoubtState);
213 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "naturalized", true);
214 fb3dec85 Andreas Müller
            }else{
215
                logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
216
            }
217
        }
218
        if(dudStr != null){
219
            if(dudStr.equals("P")){
220
                UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
221
                PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
222
                result.add(dudState);
223 0a6a64c9 Andreas Müller
                highestStatus = highestStatus != null ? highestStatus : dudState;
224 fb3dec85 Andreas Müller
            }else if(isMinus(dudStr)){
225
                UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
226
                PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
227
                result.add(nonNativeErrorState);
228 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", false);
229 fb3dec85 Andreas Müller
            }else if(dudStr.equals("?")){
230
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
231
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
232
                result.add(naturalisedDoubtState);
233 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", true);
234 fb3dec85 Andreas Müller
            }else{
235
                logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
236
            }
237
        }
238
        if(advStr != null){
239
            if(advStr.equals("A")){
240 b9cdcc88 Andreas Müller
                PresenceAbsenceTerm advState = state.getTransformer().getPresenceTermByKey("Adv.");
241
//                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
242 fb3dec85 Andreas Müller
                result.add(advState);
243 0a6a64c9 Andreas Müller
                highestStatus = highestStatus != null ? highestStatus : advState;
244 fb3dec85 Andreas Müller
            }else if(isMinus(advStr)){
245
                UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
246
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
247
                result.add(advState);
248 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "adventive", false);
249 b9cdcc88 Andreas Müller
            }else if(advStr.equals("(A)")){
250
                UUID rareCasualUuid = state.getTransformer().getPresenceTermUuid("(A)");
251
                PresenceAbsenceTerm rareCasual = getPresenceTerm(state, rareCasualUuid, null, null, null, false);
252
                result.add(rareCasual);
253 fb3dec85 Andreas Müller
            }else{
254
                logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
255
            }
256
        }else if(cultStr != null){
257
            if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
258
                logger.warn("'cultivated' not recognized: " + cultStr);
259
            }else if(cultStr.equals("C")){
260
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
261
                result.add(cultivatedState);
262 0a6a64c9 Andreas Müller
                highestStatus = highestStatus != null ? highestStatus : cultivatedState;
263 fb3dec85 Andreas Müller
            }else if(cultStr.equals("?")){
264
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
265
                result.add(cultivatedState);
266 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "cultivated", true);
267 fb3dec85 Andreas Müller
            }else if(cultStr.equals("(C)")){
268
                UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
269
                PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
270
                result.add(cultivatedState);
271
            }else if(isMinus(cultStr)){
272
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
273
                result.add(cultivatedState);
274 0a6a64c9 Andreas Müller
                checkAbsentHighestState(highestStatus, line, "cultivated", false);
275 fb3dec85 Andreas Müller
            }else{
276
                logger.warn(line + "'cultivated' not recognized: " + cultStr);
277 ede5c502 Andreas Müller
            }
278
        }
279 0a6a64c9 Andreas Müller
        state.setHighestStatusForTaxon(highestStatus);
280 fb3dec85 Andreas Müller
        return result;
281
    }
282
283
284 0a6a64c9 Andreas Müller
    /**
285
     * @param highestStatus
286
     * @param line
287
     */
288
    private void checkAbsentHighestState(PresenceAbsenceTerm highestStatus, String line, String stateLabel, boolean doubtful) {
289 b9cdcc88 Andreas Müller
        //can be removed, highest status is not used anymore
290 0a6a64c9 Andreas Müller
        if (highestStatus == null){
291
            String absentStr = doubtful ? "doubtful" : "absent";
292 b9cdcc88 Andreas Müller
            logger.info(line + "Highest cuban state is " + absentStr + " " + stateLabel);
293 0a6a64c9 Andreas Müller
        }
294
295
    }
296
297
298 fb3dec85 Andreas Müller
    /**
299
     * @param indigenousStr
300
     * @return
301
     */
302
    private boolean isMinus(String str) {
303 b9cdcc88 Andreas Müller
        return str.equals("-") || str.equals("–") || str.equals("‒");
304 ede5c502 Andreas Müller
    }
305
306
307
    /**
308
     * @param indigenousStr
309
     * @return
310
     */
311 fb3dec85 Andreas Müller
    private boolean checkPlusMinusDoubt(String str) {
312
        return str.equals("+") || isMinus(str)|| str.equals("?");
313 ede5c502 Andreas Müller
    }
314
315
316
    /**
317
     * @param indigenousStr
318
     * @param indigenousDoubtStr
319
     * @param naturalisedStr
320
     * @param dudStr
321
     * @param advStr
322
     * @param cultStr
323
     */
324
    private boolean checkAllNull(String ... others) {
325
        for (String other : others){
326
            if (other != null){
327
                return false;
328
            }
329
        }
330
        return true;
331
    }
332
333
334 fb3dec85 Andreas Müller
    private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
335
//    String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
336
//                    + "(\\((.{6,})\\))?";
337
    private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
338
                                                     +"(\\((.{6,})\\))?";
339
    private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
340
            +"(\\((.{6,})\\))?";
341 b0b606e6 Andreas Müller
    private static final String auctRegExStr = "auct\\."
342 b9cdcc88 Andreas Müller
            +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sCombs|\\sBritton|\\sGriseb\\.(\\sFC-S|\\sA&S)?|\\sWright"
343 c9f78619 Andreas Müller
            + "|\\sHammer|\\sEngl\\.||\\sMaza|\\sMiers|\\sRoig|\\sBorhidi|\\sFRC|\\sCoL"
344
            + "|\\sAckerman|\\sMújica|\\sDíaz|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
345
346
347 5cdaf78e Andreas Müller
    private static final String missapliedRegExStr = "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
348 c9f78619 Andreas Müller
    private static final String sphalmRegExStr = "“(.*{5,})”\\s+((FC-S|A&S)\\s)?sphalm\\.(\\s(FC(-S)?|A&S|inval\\.))?";
349
    private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.(\\s(West|Moldenke|FC|Jacq.))?";
350 fb3dec85 Andreas Müller
    private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
351
352
    private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
353
    private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
354
    private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
355
    private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
356 c9f78619 Andreas Müller
    private static final Pattern sphalmRegEx = Pattern.compile(sphalmRegExStr);
357 fb3dec85 Andreas Müller
358 ede5c502 Andreas Müller
    /**
359
     * @param record
360
     * @param state
361
     * @param taxon
362
     */
363 b9cdcc88 Andreas Müller
    private void makeSynonyms(HashMap<String, String> record, CubaImportState state, boolean isFirstSynonym) {
364 fb3dec85 Andreas Müller
//        boolean forAccepted = true;
365
        String synonymStr = record.get("Syn.");
366
        String line = state.getCurrentLine() + ": ";
367
368 b9cdcc88 Andreas Müller
369 fb3dec85 Andreas Müller
        if (synonymStr == null){
370
            //TODO test that this is not a synonym only line
371
            return;
372
        }
373 b9cdcc88 Andreas Müller
374
        if (state.getCurrentTaxon() == null){
375
            logger.error(line + "Current taxon is null for synonym");
376
            return;
377
        }
378
379
380 fb3dec85 Andreas Müller
        synonymStr = synonymStr.trim();
381 b9cdcc88 Andreas Müller
        synonymStr = synonymStr.replace("[taxon]", "[infraspec.]");
382 fb3dec85 Andreas Müller
383
//        String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
384
//        String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
385
386
//        Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
387
388 b9cdcc88 Andreas Müller
389 fb3dec85 Andreas Müller
        Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
390
        Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
391
        Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
392
        Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
393 c9f78619 Andreas Müller
        Matcher sphalmMatcher = sphalmRegEx.matcher(synonymStr);
394 fb3dec85 Andreas Müller
395 7d882578 Andreas Müller
        List<IBotanicalName> homonyms = new ArrayList<>();
396 fb3dec85 Andreas Müller
        if (missapliedMatcher.matches()){
397 5cdaf78e Andreas Müller
            boolean doubtful = missapliedMatcher.group(1) != null;
398
            String firstPart = missapliedMatcher.group(2);
399 86536e03 Andreas Müller
            IBotanicalName name = (IBotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
400 b9cdcc88 Andreas Müller
            name.addSource(makeOriginalSource(state));
401 fb3dec85 Andreas Müller
402 5cdaf78e Andreas Müller
            String secondPart = missapliedMatcher.group(3);
403 fb3dec85 Andreas Müller
            Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
404 b9cdcc88 Andreas Müller
            misappliedNameTaxon.addSource(makeOriginalSource(state));
405 5cdaf78e Andreas Müller
            misappliedNameTaxon.setDoubtful(doubtful);
406 fb3dec85 Andreas Müller
            if (secondPart.startsWith("sensu")){
407
                secondPart = secondPart.substring(5).trim();
408
                if (secondPart.contains(" ")){
409 5cdaf78e Andreas Müller
                    logger.warn(line + "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart);
410 fb3dec85 Andreas Müller
                }
411 8422c0cd Andreas Müller
                Reference sensu = ReferenceFactory.newGeneric();
412 fb3dec85 Andreas Müller
                Team team = Team.NewTitledInstance(secondPart, null);
413
                sensu.setAuthorship(team);
414
                misappliedNameTaxon.setSec(sensu);
415 b0b606e6 Andreas Müller
            }else if (secondPart.matches(auctRegExStr)){
416 fb3dec85 Andreas Müller
                secondPart = secondPart.replace("p. p.", "p.p.");
417
                misappliedNameTaxon.setAppendedPhrase(secondPart);
418
            }else{
419
                logger.warn(line + "Misapplied second part not recognized: " + secondPart);
420
            }
421
            //TODO
422 8422c0cd Andreas Müller
            Reference relRef = null;
423 fb3dec85 Andreas Müller
            state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
424
        }else if (nomInvalMatcher.matches()){
425
            String firstPart = nomInvalMatcher.group(1);
426 c9f78619 Andreas Müller
            String afterInval = nomInvalMatcher.group(2);
427
            if (StringUtils.isNotBlank(afterInval)){
428
                logger.warn(state.getCurrentLine() + ": After inval to be implemented: " + afterInval);
429
            }
430 86536e03 Andreas Müller
            TaxonName name = (TaxonName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
431 b9cdcc88 Andreas Müller
            name.addSource(makeOriginalSource(state));
432 fb3dec85 Andreas Müller
            NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
433
            name.addStatus(status);
434 3ef2e1bd Andreas Müller
            Synonym syn = state.getCurrentTaxon().addSynonymName(name, SynonymType.SYNONYM_OF());
435
            syn.addSource(makeOriginalSource(state));
436 c9f78619 Andreas Müller
        }else if (sphalmMatcher.matches()){
437
            String firstPart = sphalmMatcher.group(1);
438
            String sphalmPart = synonymStr.replace(firstPart, "").replace("“","").replace("”","").trim();
439 86536e03 Andreas Müller
            TaxonName name = (TaxonName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
440 c9f78619 Andreas Müller
//            NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
441
//            name.addStatus(status);
442 b9cdcc88 Andreas Müller
            name.addSource(makeOriginalSource(state));
443 3ef2e1bd Andreas Müller
            Synonym syn = state.getCurrentTaxon().addSynonymName(name, SynonymType.SYNONYM_OF());
444
            syn.setAppendedPhrase(sphalmPart);
445
            syn.setSec(null);
446
            syn.addSource(makeOriginalSource(state));
447 fb3dec85 Andreas Müller
        }else if (acceptedMatcher.matches()){
448
            String firstPart = acceptedMatcher.group(1);
449
            String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
450 7d882578 Andreas Müller
            List<IBotanicalName> list = handleHomotypicGroup(firstPart, state, state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
451 b9cdcc88 Andreas Müller
            checkFirstSynonym(state, list, isFirstSynonym, synonymStr, false);
452 fb3dec85 Andreas Müller
        }else if(heterotypicMatcher.matches()){
453
            String firstPart = heterotypicMatcher.group(1).trim();
454
            String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
455
            String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
456
            boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
457 c9f78619 Andreas Müller
            firstPart = replaceHomonIlleg(firstPart);
458
            boolean isHomonym = firstPart.matches(".*" + HOMONYM_MARKER);
459 86536e03 Andreas Müller
            TaxonName synName = (TaxonName)makeName(state, firstPart);
460 fb3dec85 Andreas Müller
            if (synName.isProtectedTitleCache()){
461 b9cdcc88 Andreas Müller
                logger.warn(line + "Heterotypic base synonym could not be parsed correctly: " + firstPart);
462 fb3dec85 Andreas Müller
            }
463
            if (isHomonym){
464
                homonyms.add(synName);
465
            }
466 3ef2e1bd Andreas Müller
            Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
467
            syn.setDoubtful(isDoubtful);
468
            syn.addSource(makeOriginalSource(state));
469 7d882578 Andreas Müller
            List<IBotanicalName> list = handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
470 b9cdcc88 Andreas Müller
            checkFirstSynonym(state, list, isFirstSynonym, synonymStr, true);
471
472
        }else if (isSpecialHeterotypic(synonymStr)){
473 86536e03 Andreas Müller
            TaxonName synName = (TaxonName)makeName(state, synonymStr);
474 b9cdcc88 Andreas Müller
            if (synName.isProtectedTitleCache()){
475
                logger.warn(line + "Special heterotypic synonym could not be parsed correctly:" + synonymStr);
476
            }
477 3ef2e1bd Andreas Müller
            Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
478
            syn.addSource(makeOriginalSource(state));
479 fb3dec85 Andreas Müller
        }else{
480
            logger.warn(line + "Synonym entry does not match: " + synonymStr);
481
        }
482
    }
483
484 b9cdcc88 Andreas Müller
    /**
485
     * @param state
486
     * @param list
487
     * @param isFirstSynonym
488
     * @param synonymStr
489
     * @param b
490
     */
491 7d882578 Andreas Müller
    private void checkFirstSynonym(CubaImportState state, List<IBotanicalName> list, boolean isFirstSynonym, String synonymStr, boolean isHeterotypicMatcher) {
492 b9cdcc88 Andreas Müller
        if (!isFirstSynonym){
493
            return;
494
        }
495
        String line = state.getCurrentLine() + ": ";
496 7d882578 Andreas Müller
        IBotanicalName currentName = isHeterotypicMatcher? (IBotanicalName)state.getCurrentTaxon().getName(): list.get(0);
497 b9cdcc88 Andreas Müller
        boolean currentHasBasionym = currentName.getBasionymAuthorship() != null;
498 7d882578 Andreas Müller
        IBotanicalName firstSynonym = isHeterotypicMatcher ? list.get(0): list.get(1);
499 b9cdcc88 Andreas Müller
//        if (list.size() <= 1){
500
//            logger.error(line + "homotypic list size is 1 but shouldn't");
501
//            return;
502
//        }
503
        if (isHeterotypicMatcher && currentHasBasionym){
504
            logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has basionym author but has no homotypic basionym , but : " + synonymStr);
505
        }else if (isHeterotypicMatcher){
506
            //first synonym must not have a basionym author
507
            if (firstSynonym.getBasionymAuthorship() != null){
508
                logger.error(line + "Current taxon (" + currentName.getTitleCache() + ") has no basionym but first synonym requires basionym : " + synonymStr);
509
            }
510
        }else{  //isAcceptedMatcher
511
            if (currentHasBasionym){
512
                if (! matchAuthor(currentName.getBasionymAuthorship(), firstSynonym.getCombinationAuthorship())){
513
                    logger.info(line + "Current basionym author and first synonym combination author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
514
                }
515
            }else{
516
                if (! matchAuthor(currentName.getCombinationAuthorship(), firstSynonym.getBasionymAuthorship())){
517
                    logger.info(line + "Current combination author and first synonym basionym author do not match: " + currentName.getTitleCache() + "<->" + firstSynonym.getTitleCache());
518
                }
519
            }
520
        }
521
522
    }
523
524
525
    /**
526
     * @param synonymStr
527
     * @return
528
     */
529
    private boolean isSpecialHeterotypic(String synonymStr) {
530
        if (synonymStr == null){
531
            return false;
532
        }else if (synonymStr.equals("Rhynchospora prenleloupiana (‘prenteloupiana’) Boeckeler")){
533
            return true;
534
        }else if (synonymStr.equals("Psidium longipes var. orbiculare (O.Berg) McVaugh")){
535
            return true;
536
        }
537
        return false;
538
    }
539
540
541
    /**
542
     * @param areaKey
543
     * @param record
544
     * @param state
545
     * @param taxon
546
     */
547
    private void makeSingleProvinceDistribution(String areaKey,
548
            HashMap<String, String> record,
549
            CubaImportState state) {
550
        try {
551
            UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
552
            if (areaUuid == null){
553
                logger.warn("Area not recognized: " + areaKey);
554
                return;
555
            }
556
            if (record.get(areaKey)==null){
557
                return; //no status defined
558
            }
559
560
            NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
561
            if (area == null){
562
                logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
563
            }
564
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
565
            PresenceAbsenceTerm status =  makeProvinceStatus(areaKey, record, state);
566
            if (status == null){
567
                logger.warn(state.getCurrentLine() + ": Province distribution status could not be defined: " + record.get(areaKey));
568
            }
569
            Distribution distribution = Distribution.NewInstance(area, status);
570
            desc.addElement(distribution);
571
            distribution.addSource(makeDescriptionSource(state));
572
        } catch (UndefinedTransformerMethodException e) {
573
            e.printStackTrace();
574
        }
575
576
    }
577 fb3dec85 Andreas Müller
578
579
    /**
580
     * @param synonymStr
581
     * @param state
582
     * @param homonyms
583
     * @param homonymPart
584
     * @param isDoubtful
585
     * @param taxon
586
     * @param homotypicalGroup
587
     */
588 7d882578 Andreas Müller
    private List<IBotanicalName> handleHomotypicGroup(String homotypicStrOrig,
589 fb3dec85 Andreas Müller
            CubaImportState state,
590 7d882578 Andreas Müller
            IBotanicalName homotypicName,
591 fb3dec85 Andreas Müller
            boolean isHeterotypic,
592 7d882578 Andreas Müller
            List<IBotanicalName> homonyms,
593 fb3dec85 Andreas Müller
            String homonymPart,
594
            boolean isDoubtful) {
595
596 7d882578 Andreas Müller
        List<IBotanicalName> homotypicNameList = new ArrayList<>();
597 b9cdcc88 Andreas Müller
        homotypicNameList.add(homotypicName);
598
599
        String homotypicStr = homotypicStrOrig;
600 fb3dec85 Andreas Müller
        if (homotypicStr == null){
601 b9cdcc88 Andreas Müller
            return homotypicNameList;
602 fb3dec85 Andreas Müller
        }else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
603
            homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
604
        }
605
606 b9cdcc88 Andreas Müller
        HomotypicalGroup homotypicGroup = homotypicName.getHomotypicalGroup();
607 fb3dec85 Andreas Müller
        String[] splits = homotypicStr.split("\\s*,\\s*");
608
        for (String split : splits){
609 c9f78619 Andreas Müller
            split = replaceHomonIlleg(split);
610
            boolean isHomonym = split.matches(".*" + HOMONYM_MARKER);
611 ea7deae0 Andreas Müller
            TaxonName newName = (TaxonName)makeName(state, split);
612 b9cdcc88 Andreas Müller
            newName.setHomotypicalGroup(homotypicGroup);  //not really necessary as this is later set anyway
613 fb3dec85 Andreas Müller
            if (newName.isProtectedTitleCache()){
614
                logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
615
            }
616
            if (isHomonym){
617
                homonyms.add(newName);
618
            }
619
            if (isHeterotypic){
620 3ef2e1bd Andreas Müller
                Synonym syn = state.getCurrentTaxon().addHeterotypicSynonymName(newName, null, null, homotypicGroup);
621
                syn.setDoubtful(isDoubtful);
622
                syn.addSource(makeOriginalSource(state));
623 fb3dec85 Andreas Müller
//                newName.addBasionym(homotypicName);
624
            }else{
625 3ef2e1bd Andreas Müller
                state.getCurrentTaxon().addHomotypicSynonymName(newName);
626 fb3dec85 Andreas Müller
            }
627 b9cdcc88 Andreas Müller
            handleBasionym(state, homotypicNameList, homonyms, newName);
628
            homotypicNameList.add(newName);
629 fb3dec85 Andreas Müller
        }
630 b9cdcc88 Andreas Müller
        makeHomonyms(homonyms, homonymPart, state, homotypicGroup);
631
        return homotypicNameList;
632 c9f78619 Andreas Müller
    }
633
634
635
    /**
636
     * @param split
637
     * @return
638
     */
639
    private String replaceHomonIlleg(String split) {
640
        String result = split.trim().replace("homon. illeg.", "nom. illeg. homon.").trim();
641
        return result;
642 fb3dec85 Andreas Müller
    }
643
644
645
    /**
646
     * @param homonyms
647
     * @param homonymPart
648
     * @param state
649 c9f78619 Andreas Müller
     * @param currentBasionym
650 fb3dec85 Andreas Müller
     */
651 7d882578 Andreas Müller
    private void makeHomonyms(List<IBotanicalName> homonyms, String homonymPartOrig, CubaImportState state,
652 b9cdcc88 Andreas Müller
            HomotypicalGroup homotypicGroup) {
653 fb3dec85 Andreas Müller
        String line = state.getCurrentLine() + ": ";
654 c9f78619 Andreas Müller
        String homonymPart = homonymPartOrig == null ? "" : homonymPartOrig.trim();
655 fb3dec85 Andreas Müller
        if (homonyms.isEmpty() && homonymPart.equals("")){
656
            return;
657
        }else if (homonymPart.equals("")){
658
            logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
659
            return;
660
        }
661
        homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
662
        String[] splits = homonymPart.split("\\]\\s*\\[");
663
        if (splits.length != homonyms.size()){
664 c9f78619 Andreas Müller
            if(homonyms.size() == 0 && splits.length >= 1){
665 b9cdcc88 Andreas Müller
                handleSimpleBlockingNames(splits, state, homotypicGroup);
666 c9f78619 Andreas Müller
            }else{
667
                logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
668
            }
669 fb3dec85 Andreas Müller
            return;
670
        }
671
        int i = 0;
672
        for (String split : splits){
673
            split = split.replaceAll("^non\\s+", "");
674 86536e03 Andreas Müller
            TaxonName newName = (TaxonName)makeName(state, split);
675 c9f78619 Andreas Müller
//            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
676 fb3dec85 Andreas Müller
            if (newName.isProtectedTitleCache()){
677
                logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
678
            }
679 b9cdcc88 Andreas Müller
            homonyms.get(i).addRelationshipToName(newName, NameRelationshipType.LATER_HOMONYM(), null);
680 fb3dec85 Andreas Müller
            i++;
681
        }
682
    }
683
684 c9f78619 Andreas Müller
    /**
685
     * @param homonymPart
686
     * @param state
687 b9cdcc88 Andreas Müller
     * @param homotypicGroup
688 c9f78619 Andreas Müller
     */
689 b9cdcc88 Andreas Müller
    private void handleSimpleBlockingNames(String[] splitsi,
690
            CubaImportState state,
691
            HomotypicalGroup homotypicGroup) {
692 7d882578 Andreas Müller
        List<IBotanicalName> replacementNameCandidates = new ArrayList<>();
693 c9f78619 Andreas Müller
        for (String spliti : splitsi){
694
695
            String split = spliti.replaceAll("^non\\s+", "");
696 86536e03 Andreas Müller
            IBotanicalName newName = makeName(state, split);
697 c9f78619 Andreas Müller
            if (newName.isProtectedTitleCache()){
698
                logger.warn(state.getCurrentLine() + ": blocking name could not be parsed: " + split);
699
            }
700 7d882578 Andreas Müller
            Set<IBotanicalName> typifiedNames = (Set)homotypicGroup.getTypifiedNames();
701
            Set<IBotanicalName> candidates = new HashSet<>();
702
            for (IBotanicalName name : typifiedNames){
703 c9f78619 Andreas Müller
                if (name.getGenusOrUninomial() != null && name.getGenusOrUninomial().equals(newName.getGenusOrUninomial())){
704
                    if (name.getStatus().isEmpty() || ! name.getStatus().iterator().next().getType().equals(NomenclaturalStatusType.ILLEGITIMATE())){
705
                        candidates.add(name);
706
                    }
707
                }
708
            }
709
            if (candidates.size() == 1){
710 ea7deae0 Andreas Müller
                TaxonName blockedName = (TaxonName)candidates.iterator().next();
711 b9cdcc88 Andreas Müller
                newName.addRelationshipToName(blockedName, NameRelationshipType.BLOCKING_NAME_FOR(), null);
712
                replacementNameCandidates.add(blockedName);
713 c9f78619 Andreas Müller
            }else{
714
                logger.warn(state.getCurrentLine() + ": Blocking name could not be handled. " + candidates.size() + " candidates.");
715
            }
716
        }
717 b9cdcc88 Andreas Müller
        makeReplacedSynonymIfPossible(state, homotypicGroup, replacementNameCandidates);
718
    }
719
720
    /**
721
     * @param homotypicGroup
722
     * @param replacementNameCandidates
723
     */
724
    private void makeReplacedSynonymIfPossible(CubaImportState state,
725
            HomotypicalGroup homotypicGroup,
726 7d882578 Andreas Müller
            List<IBotanicalName> replacementNameCandidates) {
727 b9cdcc88 Andreas Müller
        String line = state.getCurrentLine() +": ";
728 86536e03 Andreas Müller
        List<IBotanicalName> replacedCandidates = new ArrayList<>();
729 ea7deae0 Andreas Müller
        for (TaxonName typifiedName : homotypicGroup.getTypifiedNames()){
730 86536e03 Andreas Müller
            IBotanicalName candidate = typifiedName;
731 b9cdcc88 Andreas Müller
            if (candidate.getBasionymAuthorship() == null){
732
                if (candidate.getStatus().isEmpty()){
733
                    if (! replacementNameCandidates.contains(candidate)){
734
                        replacedCandidates.add(candidate);
735
                    }
736
                }
737
            }
738
        }
739
        if (replacedCandidates.size() == 1){
740 ea7deae0 Andreas Müller
            TaxonName replacedSynonym = (TaxonName)replacedCandidates.iterator().next();
741 7d882578 Andreas Müller
            for (IBotanicalName replacementName : replacementNameCandidates){
742 b9cdcc88 Andreas Müller
                replacementName.addReplacedSynonym(replacedSynonym, null, null, null);
743
            }
744
        }else if (replacedCandidates.size() < 1){
745
            logger.warn(line + "No replaced synonym candidate found");
746
        }else{
747
            logger.warn(line + "More than 1 ("+replacedCandidates.size()+") replaced synonym candidates found");
748
        }
749 c9f78619 Andreas Müller
    }
750
751
752 fb3dec85 Andreas Müller
    /**
753 b9cdcc88 Andreas Müller
     * @param homotypicGroup
754 fb3dec85 Andreas Müller
     * @param newName
755 b9cdcc88 Andreas Müller
     */
756 7d882578 Andreas Müller
    private void handleBasionym(CubaImportState state, List<IBotanicalName> homotypicNameList,
757
            List<IBotanicalName> homonyms, IBotanicalName newName) {
758
        for (IBotanicalName existingName : homotypicNameList){
759 b9cdcc88 Andreas Müller
            if (existingName != newName){  //should not happen anymore, as new name is added later
760
                boolean onlyIfNotYetExists = true;
761
                createBasionymRelationIfPossible(state, existingName, newName, homonyms.contains(newName), onlyIfNotYetExists);
762
            }
763
        }
764
    }
765
766
    /**
767
     * @param state
768
     * @param name1
769
     * @param name2
770 fb3dec85 Andreas Müller
     * @return
771
     */
772 7d882578 Andreas Müller
    private void createBasionymRelationIfPossible(CubaImportState state, IBotanicalName name1,
773
            IBotanicalName name2,
774 b9cdcc88 Andreas Müller
            boolean name2isHomonym, boolean onlyIfNotYetExists) {
775 ea7deae0 Andreas Müller
        TaxonName basionymName = TaxonName.castAndDeproxy(name1);
776
        TaxonName newCombination = TaxonName.castAndDeproxy(name2);
777 b9cdcc88 Andreas Müller
        //exactly one name must have a basionym author
778
        if (name1.getBasionymAuthorship() == null && name2.getBasionymAuthorship() == null
779
                || name1.getBasionymAuthorship() != null && name2.getBasionymAuthorship() != null){
780
            return;
781
        }
782
783
        //switch order if necessary
784
        if (! name2isHomonym && basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
785 86536e03 Andreas Müller
            basionymName = TaxonName.castAndDeproxy(name2);
786
            newCombination = TaxonName.castAndDeproxy(name1);
787 b9cdcc88 Andreas Müller
        }
788
        if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())
789 6af76d56 Andreas Müller
                && BasionymRelationCreator.matchLastNamePart(basionymName, newCombination)){
790 b9cdcc88 Andreas Müller
            newCombination.addBasionym(basionymName);
791
        }else{
792
            if ( (newCombination.getBasionyms().isEmpty() || ! onlyIfNotYetExists)
793
                    && isLegitimate(basionymName)
794
                    && ! name2isHomonym){
795
                logger.info(state.getCurrentLine() + ": Names are potential basionyms but either author or name part do not match: " + basionymName.getTitleCache() + " <-> " + newCombination.getTitleCache());
796
            }
797 fb3dec85 Andreas Müller
        }
798 b9cdcc88 Andreas Müller
    }
799
800
    /**
801
     * @param basionymName
802
     * @return
803
     */
804 7d882578 Andreas Müller
    private boolean isLegitimate(IBotanicalName basionymName) {
805 b9cdcc88 Andreas Müller
        for (NomenclaturalStatus nomStatus : basionymName.getStatus()){
806
            if (nomStatus.getType()!= null && nomStatus.getType().isIllegitimateType()){
807
                    return false;
808
            }
809
        }
810
        for (NameRelationship nameRel : basionymName.getNameRelations()){
811
            if (nameRel.getType()!= null && nameRel.getType().isIllegitimateType()){
812
                    return false;
813
            }
814 fb3dec85 Andreas Müller
        }
815 b9cdcc88 Andreas Müller
        return true;
816
    }
817
818
819 fb3dec85 Andreas Müller
    /**
820
     * @param combinationAuthorship
821
     * @param basi
822
     * @return
823
     */
824
    private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
825
        if (author1 == null || author2 == null){
826
            return false;
827
        }else {
828
            return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
829
        }
830 ede5c502 Andreas Müller
    }
831
832
833
    /**
834
     * @param record
835
     * @param state
836
     * @param taxon
837
     */
838 fb3dec85 Andreas Müller
    private void makeNotes(HashMap<String, String> record, CubaImportState state) {
839 ede5c502 Andreas Müller
        String notesStr = getValue(record, "(Notas)");
840
        if (notesStr == null){
841
            return;
842
        }else{
843
            Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
844
            //TODO
845 0a6a64c9 Andreas Müller
            annotation.setAnnotationType(AnnotationType.TECHNICAL());
846 fb3dec85 Andreas Müller
            state.getCurrentTaxon().addAnnotation(annotation);
847 ede5c502 Andreas Müller
        }
848
    }
849
850
851
    /**
852
     * @param record
853
     * @param state
854
     * @param familyTaxon
855
     * @return
856
     */
857 fb3dec85 Andreas Müller
    private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
858 4b9c9c4b Andreas Müller
        String taxonStrOrig = getValue(record, "Taxón");
859
        if (taxonStrOrig == null){
860 fb3dec85 Andreas Müller
            return isSynonym ? state.getCurrentTaxon() : null;
861 ede5c502 Andreas Müller
        }
862 b9cdcc88 Andreas Müller
863 ede5c502 Andreas Müller
        boolean isAbsent = false;
864 4b9c9c4b Andreas Müller
        String taxonStr = taxonStrOrig;
865
        if (taxonStrOrig.startsWith("[") && taxonStrOrig.endsWith("]")){
866 ede5c502 Andreas Müller
            taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
867
            isAbsent = true;
868
        }
869
870 b9cdcc88 Andreas Müller
        boolean isAuct = false;
871
        if (taxonStr.endsWith("auct.")){
872
            isAuct = true;
873
            taxonStr.replace("auct.", "").trim();
874
        }
875
        state.setTaxonIsAbsent(isAbsent);
876 86536e03 Andreas Müller
        IBotanicalName botanicalName = makeName(state, taxonStr);
877 8422c0cd Andreas Müller
        Reference sec = getSecReference(state);
878 fb3dec85 Andreas Müller
        Taxon taxon = Taxon.NewInstance(botanicalName, sec);
879 b9cdcc88 Andreas Müller
        if (isAuct){
880
            taxon.setAppendedPhrase("auct.");
881
        }
882
883 fb3dec85 Andreas Müller
        TaxonNode higherNode;
884 ede5c502 Andreas Müller
        if (botanicalName.isProtectedTitleCache()){
885 4b9c9c4b Andreas Müller
            logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStrOrig);
886 fb3dec85 Andreas Müller
            higherNode = familyNode;
887
        }else{
888
            String genusStr = botanicalName.getGenusOrUninomial();
889
            Taxon genus = state.getHigherTaxon(genusStr);
890
            if (genus != null){
891
                higherNode = genus.getTaxonNodes().iterator().next();
892
            }else{
893 86536e03 Andreas Müller
                IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.GENUS());
894 b9cdcc88 Andreas Müller
                name.addSource(makeOriginalSource(state));
895 fb3dec85 Andreas Müller
                name.setGenusOrUninomial(genusStr);
896
                genus = Taxon.NewInstance(name, sec);
897 b9cdcc88 Andreas Müller
                genus.addSource(makeOriginalSource(state));
898 fb3dec85 Andreas Müller
                higherNode = familyNode.addChildTaxon(genus, null, null);
899
                state.putHigherTaxon(genusStr, genus);
900
            }
901 ede5c502 Andreas Müller
        }
902 f4682883 Andreas Müller
        taxon.addSource(makeOriginalSource(state));
903
904
        TaxonNode newNode = higherNode.addChildTaxon(taxon, null, null);
905 4b9c9c4b Andreas Müller
        if(isAbsent){
906
            botanicalName.setTitleCache(taxonStrOrig, true);
907 f4682883 Andreas Müller
            newNode.setExcluded(true);
908 4b9c9c4b Andreas Müller
        }
909 fb3dec85 Andreas Müller
910 ede5c502 Andreas Müller
        return taxon;
911
    }
912
913 c9f78619 Andreas Müller
    private final String orthVarRegExStr = "[A-Z][a-z]+\\s[a-z]+\\s(\\(‘([a-z]){3,}’\\))\\s(\\([A-Z][a-z]+\\.?\\)\\s)?[A-Z][a-zó]+\\.?";
914
    private final Pattern orthVarRegEx = Pattern.compile(orthVarRegExStr);
915
    /**
916
     * @param taxonStr
917
     * @return
918
     */
919 86536e03 Andreas Müller
    private IBotanicalName makeName(CubaImportState state, String nameStrOrig) {
920 c9f78619 Andreas Müller
        //normalize
921
        String nameStr = normalizeStatus(nameStrOrig);
922
        //orthVar
923
        Matcher orthVarMatcher = orthVarRegEx.matcher(nameStr);
924
        String orthVar = null;
925
        if (orthVarMatcher.matches()) {
926
            orthVar = orthVarMatcher.group(1);
927
            nameStr = nameStr.replace(" " + orthVar, "").trim().replaceAll("\\s{2,}", " ");
928
            orthVar = orthVar.substring(2, orthVar.length() - 2);
929 b9cdcc88 Andreas Müller
        }
930 c9f78619 Andreas Müller
931 b9cdcc88 Andreas Müller
        boolean isNomInval = false;
932
        if (nameStr.endsWith("nom. inval.")){
933
            isNomInval = true;
934
            nameStr = nameStr.replace("nom. inval.", "").trim();
935 c9f78619 Andreas Müller
        }
936 b9cdcc88 Andreas Müller
937 86536e03 Andreas Müller
        TaxonName result = (TaxonName)nameParser.parseReferencedName(nameStr, nc, Rank.SPECIES());
938 b9cdcc88 Andreas Müller
        result.addSource(makeOriginalSource(state));
939
        if (isNomInval){
940
            result.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.INVALID()));
941
        }
942 c9f78619 Andreas Müller
        if (orthVar != null){
943 86536e03 Andreas Müller
            TaxonName orthVarName = (TaxonName)result.clone();
944 b9cdcc88 Andreas Müller
            orthVarName.addSource(makeOriginalSource(state));
945 c9f78619 Andreas Müller
            //TODO
946 8422c0cd Andreas Müller
            Reference citation = null;
947 c9f78619 Andreas Müller
            orthVarName.addRelationshipToName(result, NameRelationshipType.ORTHOGRAPHIC_VARIANT(), citation, null, null);
948
            orthVarName.setSpecificEpithet(orthVar);
949
        }
950 b9cdcc88 Andreas Müller
        normalizeAuthors(result);
951 c9f78619 Andreas Müller
        return result;
952
953
    }
954
955 b9cdcc88 Andreas Müller
    /**
956
     * @param result
957
     */
958 7d882578 Andreas Müller
    private void normalizeAuthors(IBotanicalName result) {
959 b9cdcc88 Andreas Müller
        result.setCombinationAuthorship(normalizeAuthor(result.getCombinationAuthorship()));
960
        result.setExCombinationAuthorship(normalizeAuthor(result.getExCombinationAuthorship()));
961
        result.setExBasionymAuthorship(normalizeAuthor(result.getExBasionymAuthorship()));
962
        result.setBasionymAuthorship(normalizeAuthor(result.getBasionymAuthorship()));
963
964
    }
965
966
967
    /**
968
     * @param combinationAuthorship
969
     * @return
970
     */
971
    private TeamOrPersonBase<?> normalizeAuthor(TeamOrPersonBase<?> author) {
972
        if (author == null){
973
            return null;
974
        }
975
        TeamOrPersonBase<?> result;
976
        if (author.isInstanceOf(Person.class)){
977
            result = normalizePerson(CdmBase.deproxy(author, Person.class));
978
        }else{
979
            Team team = CdmBase.deproxy(author, Team.class);
980
            List<Person> list = team.getTeamMembers();
981
            for(int i = 0; i < list.size(); i++){
982
                Person person = list.get(i);
983
                Person tmpMember = normalizePerson(person);
984
                list.set(i, tmpMember);
985
            }
986
            return team;
987
        }
988
        return result;
989
    }
990
991
992
    /**
993
     * @param deproxy
994
     * @return
995
     */
996
    private Person normalizePerson(Person person) {
997
        String title = person.getNomenclaturalTitle();
998
        title = title.replaceAll("(?<=[a-zA-Z])\\.(?=[a-zA-Z])", ". ");
999
        person.setNomenclaturalTitle(title);
1000
        boolean isFilius = title.endsWith(" f.");
1001
        if (isFilius){
1002
            title.replace(" f.", "");
1003
        }
1004
1005
        String[] splits = title.split("\\s+");
1006
        int nNotFirstName = isFilius ? 2 : 1;
1007
        person.setLastname(splits[splits.length - nNotFirstName] + (isFilius? " f." : ""));
1008
        person.setFirstname(CdmUtils.concat(" ", Arrays.copyOfRange(splits, 0, splits.length-nNotFirstName)));
1009
        return person;
1010
    }
1011
1012
1013 fb3dec85 Andreas Müller
    /**
1014
     * @param state
1015
     * @return
1016
     */
1017 8422c0cd Andreas Müller
    private Reference getSecReference(CubaImportState state) {
1018
        Reference result = state.getSecReference();
1019 fb3dec85 Andreas Müller
        if (result == null){
1020
            result = ReferenceFactory.newDatabase();
1021
            result.setTitle("Flora of Cuba");
1022
            state.setSecReference(result);
1023
        }
1024
        return result;
1025
    }
1026
1027
1028
    private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
1029 c9f78619 Andreas Müller
            "nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub.", "nom. nud."};
1030 fb3dec85 Andreas Müller
    /**
1031
     * @param taxonStr
1032
     * @return
1033
     */
1034 c9f78619 Andreas Müller
    private String normalizeStatus(String nameStr) {
1035
        if (nameStr == null){
1036 fb3dec85 Andreas Müller
            return null;
1037
        }
1038 c9f78619 Andreas Müller
        String result = nameStr.replaceAll(HOMONYM_MARKER, "").trim();
1039 fb3dec85 Andreas Müller
        for (String nomStatusStr : nomStatusStrings){
1040
            nomStatusStr = " " + nomStatusStr;
1041 c9f78619 Andreas Müller
            if (result.endsWith(nomStatusStr)){
1042
                result = result.replace(nomStatusStr, "," + nomStatusStr);
1043 fb3dec85 Andreas Müller
            }
1044
        }
1045 c9f78619 Andreas Müller
        result = result.replaceAll(DOUBTFUL_MARKER, "").trim();
1046
        result = result.replace("[taxon]", "[infraspec.]");
1047
        return result;
1048 fb3dec85 Andreas Müller
1049
1050
    }
1051
1052 ede5c502 Andreas Müller
1053
    /**
1054
     * @param record
1055
     * @param state
1056
     * @return
1057
     */
1058
    private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
1059 c9f78619 Andreas Müller
        String familyStr = getValue(record, "Fam. default");
1060 ede5c502 Andreas Müller
        if (familyStr == null){
1061
            return null;
1062
        }
1063 b9cdcc88 Andreas Müller
        familyStr = familyStr.trim();
1064
        String alternativeFamilyStr = null;
1065
        if (familyStr.contains("/")){
1066
            String[] splits = familyStr.split("/");
1067
            if (splits.length > 2){
1068
                logger.warn(state.getCurrentLine() +": " + "More than 1 alternative name:" + familyStr);
1069
            }
1070
            familyStr = splits[0].trim();
1071
            alternativeFamilyStr = splits[1].trim();
1072
        }
1073
1074 ede5c502 Andreas Müller
        Taxon family = state.getHigherTaxon(familyStr);
1075
        TaxonNode familyNode;
1076
        if (family != null){
1077
            familyNode = family.getTaxonNodes().iterator().next();
1078
        }else{
1079 86536e03 Andreas Müller
            TaxonName name = (TaxonName)makeFamilyName(state, familyStr);
1080 8422c0cd Andreas Müller
            Reference sec = getSecReference(state);
1081 b9cdcc88 Andreas Müller
            family = Taxon.NewInstance(name, sec);
1082 fb3dec85 Andreas Müller
            ITaxonTreeNode rootNode = getClassification(state);
1083 b9cdcc88 Andreas Müller
            familyNode = rootNode.addChildTaxon(family, sec, null);
1084
            state.putHigherTaxon(familyStr, family);
1085
1086
        }
1087
1088
        if (isNotBlank(alternativeFamilyStr)){
1089
            NameRelationshipType type = NameRelationshipType.ALTERNATIVE_NAME();
1090 86536e03 Andreas Müller
            TaxonName alternativeName = (TaxonName)makeFamilyName(state, alternativeFamilyStr);
1091
            IBotanicalName familyName = family.getName();
1092 b9cdcc88 Andreas Müller
            boolean hasRelation = false;
1093
            for (NameRelationship nameRel : familyName.getRelationsToThisName()){
1094
                if (nameRel.getType().equals(type)){
1095
                    if (nameRel.getFromName().equals(alternativeName)){
1096
                        hasRelation = true;
1097
                    }
1098
                }
1099
            }
1100
            if (!hasRelation){
1101
                familyName.addRelationshipFromName(alternativeName, type, null);
1102
            }
1103
1104 ede5c502 Andreas Müller
        }
1105
1106
        return familyNode;
1107
    }
1108
1109 b9cdcc88 Andreas Müller
1110
    /**
1111
     * @param state
1112
     * @param taxon
1113
     */
1114
    private void validateTaxonIsAbsent(CubaImportState state, Taxon taxon) {
1115
        if (!state.isTaxonIsAbsent()){
1116
            return;
1117
        }
1118
1119
        for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
1120
            if (el instanceof Distribution){
1121
                Distribution dist = (Distribution)el;
1122
                NamedArea area = dist.getArea();
1123
                if (isCubanArea(area)){
1124
                    PresenceAbsenceTerm status = dist.getStatus();
1125
                    if (status != null && !status.isAbsenceTerm()){
1126
                        if (!isDoubtfulTerm(status)){
1127
                            String name = taxon.getName().getTitleCache();
1128
                            logger.error(state.getCurrentLine() +": Taxon ("+name+")is absent'[]' but has presence distribution: " + status.getTitleCache());
1129
                            return;
1130
                        }
1131
                    }
1132
                }
1133
            }
1134
        }
1135
    }
1136
1137
    /**
1138
     * @param state
1139
     * @param taxon
1140
     */
1141
    private void validateEndemic(CubaImportState state, Taxon taxon) {
1142
1143
        boolean hasExternalPresence = false;
1144
        for (DescriptionElementBase el : taxon.getDescriptions().iterator().next().getElements()){
1145
            if (el instanceof Distribution){
1146
                Distribution dist = (Distribution)el;
1147
                NamedArea area = dist.getArea();
1148
                if (!isCubanArea(area)){
1149
                    PresenceAbsenceTerm status = dist.getStatus();
1150
                    if (status != null && !status.isAbsenceTerm()){
1151
                        if (!isDoubtfulTerm(status)){
1152
                            hasExternalPresence = true;
1153
                            if (state.isEndemic()){
1154
                                String name = taxon.getName().getTitleCache();
1155
                                logger.error(state.getCurrentLine() +": Taxon ("+name+")is endemic but has non-cuban distribution: " + area.getIdInVocabulary() + "-" + status.getIdInVocabulary());
1156
                                return;
1157
                            }
1158
                        }
1159
                    }
1160
                }
1161
            }
1162
        }
1163
        if (!state.isEndemic() && ! hasExternalPresence){
1164
            String name = taxon.getName().getTitleCache();
1165
            logger.error(state.getCurrentLine() +": Taxon ("+name+")is not endemic but has no non-cuban distribution" );
1166
        }
1167
    }
1168
1169
1170 c9f78619 Andreas Müller
    /**
1171
     * @param state
1172
     * @param taxon
1173
     * @param famStr
1174
     * @param famRef
1175
     * @return
1176
     */
1177 8422c0cd Andreas Müller
    private Taxon makeAlternativeFamilyTaxon(CubaImportState state, String famStr, Reference famRef) {
1178 c9f78619 Andreas Müller
        String key = famRef.getTitle() + ":"+ famStr;
1179
        Taxon family = state.getHigherTaxon(key);
1180
        if (family == null){
1181 86536e03 Andreas Müller
            IBotanicalName name = makeFamilyName(state, famStr);
1182 c9f78619 Andreas Müller
            family = Taxon.NewInstance(name, famRef);
1183
            state.putHigherTaxon(key, family);
1184
        }
1185
1186
        return family;
1187
    }
1188
1189 ede5c502 Andreas Müller
1190 b9cdcc88 Andreas Müller
    /**
1191
     * @param state
1192
     * @param famStr
1193
     * @return
1194
     */
1195 86536e03 Andreas Müller
    private IBotanicalName makeFamilyName(CubaImportState state, String famStr) {
1196
        IBotanicalName name = state.getFamilyName(famStr);
1197 b9cdcc88 Andreas Müller
        if (name == null){
1198 ded3de15 Andreas Müller
            name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
1199 b9cdcc88 Andreas Müller
            name.setGenusOrUninomial(famStr);
1200
            state.putFamilyName(famStr, name);
1201
            name.addSource(makeOriginalSource(state));
1202
        }
1203
        return name;
1204
    }
1205
1206
1207 ede5c502 Andreas Müller
    /**
1208
     * @param state
1209
     * @return
1210
     */
1211 fb3dec85 Andreas Müller
    private TaxonNode getClassification(CubaImportState state) {
1212 ede5c502 Andreas Müller
        Classification classification = state.getClassification();
1213
        if (classification == null){
1214 fb3dec85 Andreas Müller
            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
1215
        }
1216
        TaxonNode rootNode = state.getRootNode();
1217
        if (rootNode == null){
1218
            rootNode = getTaxonNodeService().find(plantaeUuid);
1219
        }
1220
        if (rootNode == null){
1221 8422c0cd Andreas Müller
            Reference sec = getSecReference(state);
1222 fb3dec85 Andreas Müller
            if (classification == null){
1223
                String classificationName = state.getConfig().getClassificationName();
1224
                //TODO
1225
                Language language = Language.DEFAULT();
1226
                classification = Classification.NewInstance(classificationName, sec, language);
1227
                state.setClassification(classification);
1228
                classification.setUuid(state.getConfig().getClassificationUuid());
1229
                classification.getRootNode().setUuid(rootUuid);
1230
            }
1231
1232 86536e03 Andreas Müller
            IBotanicalName plantaeName = TaxonNameFactory.NewBotanicalInstance(Rank.KINGDOM());
1233 fb3dec85 Andreas Müller
            plantaeName.setGenusOrUninomial("Plantae");
1234
            Taxon plantae = Taxon.NewInstance(plantaeName, sec);
1235
            TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
1236
            plantaeNode.setUuid(plantaeUuid);
1237
            state.setRootNode(plantaeNode);
1238 ede5c502 Andreas Müller
            getClassificationService().save(classification);
1239 fb3dec85 Andreas Müller
1240
            rootNode = plantaeNode;
1241 ede5c502 Andreas Müller
        }
1242 fb3dec85 Andreas Müller
        return rootNode;
1243 ede5c502 Andreas Müller
    }
1244
1245
1246
    /**
1247
     * @param record
1248
     * @param originalKey
1249
     * @return
1250
     */
1251
    private String getValue(HashMap<String, String> record, String originalKey) {
1252
        String value = record.get(originalKey);
1253
        if (! StringUtils.isBlank(value)) {
1254
        	if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
1255
        	value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
1256
        	return value;
1257
        }else{
1258
        	return null;
1259
        }
1260
    }
1261
1262
1263
1264
	/**
1265
	 *  Stores taxa records in DB
1266
	 */
1267
	@Override
1268
    protected void firstPass(CubaImportState state) {
1269 b9cdcc88 Andreas Müller
	    boolean isSynonymOnly = false;
1270 ede5c502 Andreas Müller
1271 f5c05984 Andreas Müller
        String line = state.getCurrentLine() + ": ";
1272 fb3dec85 Andreas Müller
        HashMap<String, String> record = state.getOriginalRecord();
1273 ede5c502 Andreas Müller
1274 fb3dec85 Andreas Müller
        Set<String> keys = record.keySet();
1275
        for (String key: keys) {
1276
            if (! expectedKeys.contains(key)){
1277 f5c05984 Andreas Müller
                logger.warn(line + "Unexpected Key: " + key);
1278 fb3dec85 Andreas Müller
            }
1279
        }
1280 ede5c502 Andreas Müller
1281 c9f78619 Andreas Müller
        if (record.get("Fam. default") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
1282 fb3dec85 Andreas Müller
            //second header line, don't handle
1283
            return;
1284
        }
1285 ede5c502 Andreas Müller
1286 fb3dec85 Andreas Müller
        //Fam.
1287
        TaxonNode familyTaxon = getFamilyTaxon(record, state);
1288
        if (familyTaxon == null){
1289
            if (record.get("Taxón") != null){
1290 f5c05984 Andreas Müller
                logger.warn(line + "Family not recognized but taxon exists: " + record.get("Taxón"));
1291 fb3dec85 Andreas Müller
                return;
1292
            }else if (record.get("Syn.") == null){
1293 f5c05984 Andreas Müller
                logger.warn(line + "Family not recognized but also no synonym exists");
1294 fb3dec85 Andreas Müller
                return;
1295
            }else{
1296 b9cdcc88 Andreas Müller
                isSynonymOnly = true;
1297 fb3dec85 Andreas Müller
            }
1298
        }
1299 ede5c502 Andreas Müller
1300 b9cdcc88 Andreas Müller
       //Taxón
1301
        Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonymOnly);
1302
        if (taxon == null && ! isSynonymOnly){
1303 f5c05984 Andreas Müller
            logger.warn(line + "taxon could not be created and is null");
1304 fb3dec85 Andreas Müller
            return;
1305
        }
1306
        state.setCurrentTaxon(taxon);
1307 ede5c502 Andreas Müller
1308 c9f78619 Andreas Müller
        //Fam. ALT
1309 4b9c9c4b Andreas Müller
        if (!isSynonymOnly){
1310
            makeAlternativeFamilies(record, state, familyTaxon, taxon);
1311
        }
1312 c9f78619 Andreas Müller
1313 fb3dec85 Andreas Müller
        //(Notas)
1314
        makeNotes(record, state);
1315 ede5c502 Andreas Müller
1316 fb3dec85 Andreas Müller
        //Syn.
1317 b9cdcc88 Andreas Müller
        makeSynonyms(record, state, !isSynonymOnly);
1318 ede5c502 Andreas Müller
1319 fb3dec85 Andreas Müller
        //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
1320
        makeCubanDistribution(record, state);
1321 ede5c502 Andreas Müller
1322
1323 0a6a64c9 Andreas Müller
//        "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1324 fb3dec85 Andreas Müller
//        "CuC","VC","Ci","SS","CA","Cam","LT",
1325
//        "CuE","Gr","Ho","SC","Gu",
1326
        makeProvincesDistribution(record, state);
1327 ede5c502 Andreas Müller
1328 c9f78619 Andreas Müller
//      "Esp","Ja","PR","Men","Bah","Cay",
1329
//      "AmN","AmC","AmS","VM"});
1330
        makeOtherAreasDistribution(record, state);
1331 0a6a64c9 Andreas Müller
1332 b9cdcc88 Andreas Müller
        validateTaxonIsAbsent(state, taxon);
1333
        if (!isSynonymOnly){
1334
            validateEndemic(state, taxon);
1335
        }
1336 0a6a64c9 Andreas Müller
1337
        state.setHighestStatusForTaxon(null);
1338
1339 fb3dec85 Andreas Müller
		return;
1340
    }
1341 ede5c502 Andreas Müller
1342
1343 b9cdcc88 Andreas Müller
    /**
1344
     * @param state
1345
     * @return
1346
     */
1347
    private IdentifiableSource makeOriginalSource(CubaImportState state) {
1348
        return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1349
    }
1350
    /**
1351
     * @param state
1352
     * @return
1353
     */
1354
    private DescriptionElementSource makeDescriptionSource(CubaImportState state) {
1355
        return DescriptionElementSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1356
    }
1357
1358
    private static Set<UUID> doubtfulStatus = new HashSet<>();
1359 ede5c502 Andreas Müller
1360 b9cdcc88 Andreas Müller
    /**
1361
     * @param status
1362
     * @return
1363
     */
1364
    private boolean isDoubtfulTerm(PresenceAbsenceTerm status) {
1365
        if (doubtfulStatus.isEmpty()){
1366
            doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyNaturalisedUuid);
1367
            doubtfulStatus.add(CubaTransformer.doubtfulIndigenousDoubtfulUuid);
1368
            doubtfulStatus.add(CubaTransformer.endemicDoubtfullyPresentUuid);
1369
            doubtfulStatus.add(CubaTransformer.naturalisedDoubtfullyPresentUuid);
1370
            doubtfulStatus.add(CubaTransformer.nonNativeDoubtfullyPresentUuid);
1371
            doubtfulStatus.add(CubaTransformer.occasionallyCultivatedUuid);
1372
            doubtfulStatus.add(CubaTransformer.rareCasualUuid);
1373
            doubtfulStatus.add(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE().getUuid());
1374
            doubtfulStatus.add(PresenceAbsenceTerm.CULTIVATED_PRESENCE_QUESTIONABLE().getUuid());
1375
        }
1376
        boolean isDoubtful = doubtfulStatus.contains(status.getUuid());
1377
        return isDoubtful;
1378
    }
1379
1380
1381
    /**
1382
     * @param area
1383
     * @return
1384
     */
1385
    private boolean isCubanArea(NamedArea area) {
1386
        if (area.getUuid().equals(CubaTransformer.uuidCuba)){
1387
            return true;
1388
        }else if (area.getPartOf()!= null){
1389
            return isCubanArea(area.getPartOf());
1390
        }else{
1391
            return false;
1392
        }
1393
    }
1394
1395
1396
    /**
1397 fb3dec85 Andreas Müller
     * @param record
1398
     * @param state
1399 c9f78619 Andreas Müller
     * @param familyTaxon
1400
     * @param taxon
1401
     */
1402
    private void makeAlternativeFamilies(HashMap<String, String> record,
1403
            CubaImportState state,
1404
            TaxonNode familyTaxon,
1405
            Taxon taxon) {
1406
1407
        String famFRC = record.get("Fam. FRC");
1408
        String famAS = record.get("Fam. A&S");
1409
        String famFC = record.get("Fam. FC");
1410
1411 8422c0cd Andreas Müller
        Reference refFRC = makeReference(state, CubaTransformer.uuidRefFRC);
1412
        Reference refAS = makeReference(state, CubaTransformer.uuidRefAS);
1413
        Reference refFC = makeReference(state, CubaTransformer.uuidRefFC);
1414 c9f78619 Andreas Müller
1415
        makeSingleAlternativeFamily(state, taxon, famFRC, refFRC);
1416
        makeSingleAlternativeFamily(state, taxon, famAS, refAS);
1417
        makeSingleAlternativeFamily(state, taxon, famFC, refFC);
1418
    }
1419
1420
1421
    /**
1422
     * @param state
1423
     * @param uuidreffrc
1424
     * @return
1425
     */
1426 8422c0cd Andreas Müller
    private Reference makeReference(CubaImportState state, UUID uuidRef) {
1427
        Reference ref = state.getReference(uuidRef);
1428 c9f78619 Andreas Müller
        if (ref == null){
1429
            ref = getReferenceService().find(uuidRef);
1430
            state.putReference(uuidRef, ref);
1431
        }
1432
        return ref;
1433
    }
1434
1435
1436
    /**
1437
     * @param state
1438
     * @param taxon
1439
     * @param famString
1440
     * @param famRef
1441
     */
1442 8422c0cd Andreas Müller
    private void makeSingleAlternativeFamily(CubaImportState state, Taxon taxon, String famStr, Reference famRef) {
1443 c9f78619 Andreas Müller
        if (isBlank(famStr)){
1444 4b9c9c4b Andreas Müller
            famStr = "-";
1445
//            return;
1446 c9f78619 Andreas Müller
        }
1447
1448
        TaxonDescription desc = getTaxonDescription(taxon, false, true);
1449
1450
        UUID altFamUuid1;
1451
        UUID altFamUuid2;
1452
        try {
1453
            altFamUuid1 = state.getTransformer().getFeatureUuid("Alt.Fam.");
1454
            altFamUuid2 = state.getTransformer().getFeatureUuid("Alt.Fam.2");
1455
        } catch (UndefinedTransformerMethodException e) {
1456
            throw new RuntimeException(e);
1457
        }
1458
1459
1460
        Taxon famTaxon = makeAlternativeFamilyTaxon(state, famStr, famRef);
1461
1462
1463
        //TextData
1464 4b9c9c4b Andreas Müller
        Feature feature1 = getFeature(state, altFamUuid1, "Families in other Floras (Text)", "Families in other Floras (Text)", "Other floras", null);
1465
        feature1.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
1466 c9f78619 Andreas Müller
//        TextData textData = TextData.NewInstance(feature1, famStr, Language.DEFAULT(), null);
1467
        TextData textData = TextData.NewInstance(feature1, null, Language.DEFAULT(), null);
1468
        textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null, famTaxon.getName(),null);
1469
        desc.addElement(textData);
1470
1471
1472
1473
        //TaxonInteraction
1474 4b9c9c4b Andreas Müller
        Feature feature2 = getFeature(state, altFamUuid2, "Families in other Floras", "Families in other Floras", "Other floras(2)", null);
1475 c9f78619 Andreas Müller
        feature2.setSupportsTaxonInteraction(true);
1476 4b9c9c4b Andreas Müller
        feature2.addRepresentation(Representation.NewInstance("Familias en otras Floras", "Familias en otras Floras", null, Language.SPANISH_CASTILIAN()));
1477 c9f78619 Andreas Müller
        TaxonInteraction taxInteract = TaxonInteraction.NewInstance(feature2);
1478 4b9c9c4b Andreas Müller
        textData.putText(Language.SPANISH_CASTILIAN(), "Familias en otras Floras");
1479 c9f78619 Andreas Müller
        taxInteract.setTaxon2(famTaxon);
1480
        taxInteract.addSource(OriginalSourceType.PrimaryTaxonomicSource, null,null, famRef, null);
1481
        desc.addElement(taxInteract);
1482
1483
        //Concept Relation
1484
        famTaxon.addTaxonRelation(taxon, TaxonRelationshipType.INCLUDES(), taxon.getSec(), null);
1485
1486
    }
1487
1488
1489
1490
1491
1492
    /**
1493
     * @param record
1494
     * @param state
1495 fb3dec85 Andreas Müller
     * @param taxon
1496
     */
1497
    // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1498
//  "CuC","VC","Ci","SS","CA","Cam","LT",
1499
//  "CuE","Gr","Ho","SC","Gu",
1500
    private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
1501
        List<String> areaKeys = Arrays.asList(new String[]{
1502
                "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
1503
                "CuC","VC","Ci","SS","CA","Cam","LT",
1504
                "CuE","Gr","Ho","SC","Gu",
1505 0a6a64c9 Andreas Müller
                });
1506
        for (String areaKey : areaKeys){
1507
            state.setCubanProvince(true);
1508
            makeSingleProvinceDistribution(areaKey, record, state);
1509
        }
1510
    }
1511
1512
    private void makeOtherAreasDistribution(HashMap<String, String> record, CubaImportState state) {
1513
        List<String> areaKeys = Arrays.asList(new String[]{
1514 fb3dec85 Andreas Müller
                "Esp","Ja","PR","Men","Bah","Cay",
1515
                "AmN","AmC","AmS","VM"});
1516
        for (String areaKey : areaKeys){
1517 0a6a64c9 Andreas Müller
            state.setCubanProvince(false);
1518 fb3dec85 Andreas Müller
            makeSingleProvinceDistribution(areaKey, record, state);
1519
        }
1520
    }
1521 ede5c502 Andreas Müller
1522 fb3dec85 Andreas Müller
1523 ede5c502 Andreas Müller
1524
1525 fb3dec85 Andreas Müller
    /**
1526
     * @param areaKey
1527
     * @param record
1528
     * @param state
1529 0a6a64c9 Andreas Müller
     * @param highestStatus
1530 fb3dec85 Andreas Müller
     * @return
1531
     * @throws UndefinedTransformerMethodException
1532
     */
1533 0a6a64c9 Andreas Müller
    private PresenceAbsenceTerm makeProvinceStatus(String areaKey,
1534
            HashMap<String, String> record,
1535
            CubaImportState state) throws UndefinedTransformerMethodException {
1536
1537 fb3dec85 Andreas Müller
        String statusStr = record.get(areaKey);
1538
        if (statusStr == null){
1539
            return null;
1540 b9cdcc88 Andreas Müller
        }else{
1541
            statusStr = statusStr.trim();
1542 fb3dec85 Andreas Müller
        }
1543
        PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
1544
        if (status == null){
1545 b9cdcc88 Andreas Müller
//            PresenceAbsenceTerm highestStatus = state.getHighestStatusForTaxon();
1546 0a6a64c9 Andreas Müller
            if (state.isCubanProvince() && isMinus(statusStr)){
1547 b9cdcc88 Andreas Müller
//                getAbsenceTermForStatus(state, highestStatus);
1548
                //we now handle cuban provinces same as external regions
1549
                status = state.getTransformer().getPresenceTermByKey("--");
1550 0a6a64c9 Andreas Müller
            }else if (! state.isCubanProvince() && isMinus(statusStr)){
1551 c9f78619 Andreas Müller
                status = state.getTransformer().getPresenceTermByKey("--");
1552 0a6a64c9 Andreas Müller
            }else{
1553 b9cdcc88 Andreas Müller
//                logger.warn("Unhandled status str for provinces / external regions: " + statusStr);
1554 0a6a64c9 Andreas Müller
                UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
1555 b9cdcc88 Andreas Müller
                if (statusUuid == null){
1556
                    logger.error(state.getCurrentLine() + ": Undefined status str for provinces / external regions. No UUID given: '" + statusStr + "'");
1557
                }else{
1558
                    status = getPresenceTerm(state, statusUuid, statusStr, statusStr, statusStr, false);
1559
                }
1560 0a6a64c9 Andreas Müller
            }
1561
        }
1562
1563
        return status;
1564
    }
1565
1566
1567
    /**
1568
     * @param highestStatus
1569
     * @throws UndefinedTransformerMethodException
1570
     */
1571
    private PresenceAbsenceTerm getAbsenceTermForStatus(CubaImportState state, PresenceAbsenceTerm highestStatus) throws UndefinedTransformerMethodException {
1572
        if (highestStatus == null){
1573
            logger.warn(state.getCurrentLine() + ": Highest status not defined");
1574
            return null;
1575
        }
1576
        PresenceAbsenceTerm result = null;
1577
        if (highestStatus.equals(getStatus(state, "E"))){
1578
            result = getStatus(state, "-E");
1579
        }else if (highestStatus.getUuid().equals(state.getTransformer().getPresenceTermUuid("Ind.")) || highestStatus.equals(PresenceAbsenceTerm.NATIVE())){
1580
            result = getStatus(state, "-Ind.");
1581
        }else if (highestStatus.equals(getStatus(state, "Ind.?"))){
1582
            result = getStatus(state, "-Ind.?");  //TODO
1583
        }else if (highestStatus.equals(getStatus(state, "N"))){
1584
            result = getStatus(state, "-N");
1585
        }else if (highestStatus.equals(getStatus(state, "P"))){
1586
            result = getStatus(state, "-P");
1587
        }else if (highestStatus.equals(getStatus(state, "A"))){
1588
            result = getStatus(state, "-A");
1589
        }else if (highestStatus.equals(getStatus(state, "C"))){
1590
            result = getStatus(state, "-C");
1591
        }
1592
        logger.warn(state.getCurrentLine() + ": Absent province status could not be defined for highest status " + highestStatus.getTitleCache());
1593
        return result;
1594
    }
1595
1596
1597
    /**
1598
     * @param string
1599
     * @return
1600
     * @throws UndefinedTransformerMethodException
1601
     */
1602
    private PresenceAbsenceTerm getStatus(CubaImportState state, String key) throws UndefinedTransformerMethodException {
1603
        PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(key);
1604
        if (status == null){
1605
            UUID statusUuid = state.getTransformer().getPresenceTermUuid(key);
1606 fb3dec85 Andreas Müller
            status = getPresenceTerm(state, statusUuid, null, null, null, false);
1607
        }
1608
        return status;
1609
    }
1610 ede5c502 Andreas Müller
1611
1612 fb3dec85 Andreas Müller
    /**
1613 ede5c502 Andreas Müller
	 *  Stores parent-child, synonym and common name relationships
1614
	 */
1615
	@Override
1616
    protected void secondPass(CubaImportState state) {
1617
//		CyprusRow cyprusRow = state.getCyprusRow();
1618
		return;
1619
	}
1620
1621
1622
    @Override
1623
    protected boolean isIgnore(CubaImportState state) {
1624
        return ! state.getConfig().isDoTaxa();
1625
    }
1626
1627
    @Override
1628
    protected boolean doCheck(CubaImportState state) {
1629
        logger.warn("DoCheck not yet implemented for CubaExcelImport");
1630
        return true;
1631
    }
1632
1633
}