Project

General

Profile

Download (38.6 KB) Statistics
| Branch: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.cuba;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.Set;
17
import java.util.UUID;
18
import java.util.regex.Matcher;
19
import java.util.regex.Pattern;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.stereotype.Component;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
27
import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
28
import eu.etaxonomy.cdm.model.agent.Team;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30
import eu.etaxonomy.cdm.model.common.Annotation;
31
import eu.etaxonomy.cdm.model.common.AnnotationType;
32
import eu.etaxonomy.cdm.model.common.Language;
33
import eu.etaxonomy.cdm.model.description.Distribution;
34
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
35
import eu.etaxonomy.cdm.model.description.TaxonDescription;
36
import eu.etaxonomy.cdm.model.location.NamedArea;
37
import eu.etaxonomy.cdm.model.name.BotanicalName;
38
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
39
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
40
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.reference.Reference;
44
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
45
import eu.etaxonomy.cdm.model.taxon.Classification;
46
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
47
import eu.etaxonomy.cdm.model.taxon.SynonymRelationship;
48
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
49
import eu.etaxonomy.cdm.model.taxon.Taxon;
50
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
51
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
52
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
53

    
54
/**
55
 * @author a.mueller
56
 * @created 05.01.2016
57
 */
58

    
59
@Component
60
public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
61
    private static final long serialVersionUID = -747486709409732371L;
62
    private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
63

    
64
    private static final String HOMONYM_MARKER = "\\s+homon.?$";
65
    private static final String DOUBTFUL_MARKER = "^\\?\\s?";
66

    
67

    
68
    private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
69
    private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
70

    
71
    private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
72
    private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
73

    
74
    private  static List<String> expectedKeys= Arrays.asList(new String[]{"Fam.","(Fam.)","Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
75

    
76
	@Override
77
    protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
78
	    //we do everything in firstPass here
79
    	return;
80
    }
81

    
82

    
83
    /**
84
     * @param record
85
     * @param state
86
     * @param taxon
87
     */
88
    private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
89
        try {
90
            NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("C"), null, null, null, null, null);
91
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
92
            List<PresenceAbsenceTerm> statuss =  makeCubanStatuss(record, state);
93
            for (PresenceAbsenceTerm status : statuss){
94
                Distribution distribution = Distribution.NewInstance(cuba, status);
95
                desc.addElement(distribution);
96
            }
97
        } catch (UndefinedTransformerMethodException e) {
98
            e.printStackTrace();
99
        }
100
    }
101

    
102

    
103
    /**
104
     * @param record
105
     * @param state
106
     * @return
107
     * @throws UndefinedTransformerMethodException
108
     */
109
    private List<PresenceAbsenceTerm> makeCubanStatuss(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
110
        boolean isAbsent = false;  //TODO
111
        PresenceAbsenceTerm highestStatus = null;
112

    
113
        String line = state.getCurrentLine() + ": ";
114
        List<PresenceAbsenceTerm> result = new ArrayList<>();
115

    
116
        String endemicStr = getValue(record, "End");
117
        String indigenousStr = getValue(record, "Ind");
118
        String indigenousDoubtStr = getValue(record, "Ind? D");
119
        String naturalisedStr = getValue(record, "Nat");
120
        String dudStr = getValue(record, "Dud P");
121
        String advStr = getValue(record, "Adv");
122
        String cultStr = getValue(record, "Cult C");
123

    
124
        if (endemicStr != null){
125
            if(endemicStr.equals("+")){
126
                PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
127
                result.add(endemicState);
128
                highestStatus = endemicState;
129
            }else if(isMinus(endemicStr)){
130
                UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
131
                PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
132
                result.add(endemicState);
133
                checkAbsentHighestState(highestStatus, line, "endemic", false);
134
            }else{
135
                logger.warn(line + "Endemic not recognized: " + endemicStr);
136
            }
137
        }
138
        if (indigenousStr != null){
139
            if(indigenousStr.equals("+")){
140
                UUID indigenousUuid = state.getTransformer().getPresenceTermUuid("Ind.");
141
                PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
142
                result.add(indigenousState);
143
                highestStatus = highestStatus != null ? highestStatus : indigenousState;
144
            }else if(isMinus(indigenousStr)){
145
                PresenceAbsenceTerm indigenousState = state.getTransformer().getPresenceTermByKey("-Ind.");
146
                result.add(indigenousState);
147
                checkAbsentHighestState(highestStatus, line, "indigenous", false);
148
            }else if(indigenousStr.equals("?")){
149
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("?Ind.");
150
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
151
                result.add(indigenousDoubtState);
152
                checkAbsentHighestState(highestStatus, line, "indigenous", true);
153
            }else{
154
                logger.warn(line + "Indigenous not recognized: " + indigenousStr);
155
            }
156
        }
157
        if(indigenousDoubtStr != null){
158
            if(indigenousDoubtStr.equals("D")){
159
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("Ind.?");
160
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
161
                result.add(indigenousDoubtState);
162
                highestStatus = highestStatus != null ? highestStatus : indigenousDoubtState;
163
            }else{
164
                logger.warn(line + "Indigenous doubtful not recognized: " + indigenousDoubtStr);
165
            }
166
        }
167
        if(naturalisedStr != null){
168
            if(naturalisedStr.equals("N")){
169
                PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
170
                result.add(haturalizedState);
171
                highestStatus = highestStatus != null ? highestStatus : haturalizedState;
172
            }else if(isMinus(naturalisedStr)){
173
                UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
174
                PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
175
                result.add(naturalisedErrorState);
176
                checkAbsentHighestState(highestStatus, line, "naturalized", false);
177
            }else if(naturalisedStr.equals("?")){
178
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
179
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
180
                result.add(naturalisedDoubtState);
181
                checkAbsentHighestState(highestStatus, line, "naturalized", true);
182
            }else{
183
                logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
184
            }
185
        }
186
        if(dudStr != null){
187
            if(dudStr.equals("P")){
188
                UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
189
                PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
190
                result.add(dudState);
191
                highestStatus = highestStatus != null ? highestStatus : dudState;
192
            }else if(isMinus(dudStr)){
193
                UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
194
                PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
195
                result.add(nonNativeErrorState);
196
                checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", false);
197
            }else if(dudStr.equals("?")){
198
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
199
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
200
                result.add(naturalisedDoubtState);
201
                checkAbsentHighestState(highestStatus, line, "non-native and doubtfully naturalised", true);
202
            }else{
203
                logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
204
            }
205
        }
206
        if(advStr != null){
207
            if(advStr.equals("A")){
208
                UUID advUuid = state.getTransformer().getPresenceTermUuid("Adv.");
209
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
210
                result.add(advState);
211
                highestStatus = highestStatus != null ? highestStatus : advState;
212
            }else if(isMinus(advStr)){
213
                UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
214
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
215
                result.add(advState);
216
                checkAbsentHighestState(highestStatus, line, "adventive", false);
217
            }else{
218
                logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
219
            }
220
        }else if(cultStr != null){
221
            if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
222
                logger.warn("'cultivated' not recognized: " + cultStr);
223
            }else if(cultStr.equals("C")){
224
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
225
                result.add(cultivatedState);
226
                highestStatus = highestStatus != null ? highestStatus : cultivatedState;
227
            }else if(cultStr.equals("?")){
228
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
229
                result.add(cultivatedState);
230
                checkAbsentHighestState(highestStatus, line, "cultivated", true);
231
            }else if(cultStr.equals("(C)")){
232
                UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
233
                PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
234
                result.add(cultivatedState);
235
            }else if(isMinus(cultStr)){
236
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
237
                result.add(cultivatedState);
238
                checkAbsentHighestState(highestStatus, line, "cultivated", false);
239
            }else{
240
                logger.warn(line + "'cultivated' not recognized: " + cultStr);
241
            }
242
        }
243
        state.setHighestStatusForTaxon(highestStatus);
244
        return result;
245
    }
246

    
247

    
248
    /**
249
     * @param highestStatus
250
     * @param line
251
     */
252
    private void checkAbsentHighestState(PresenceAbsenceTerm highestStatus, String line, String stateLabel, boolean doubtful) {
253
        if (highestStatus == null){
254
            String absentStr = doubtful ? "doubtful" : "absent";
255
            logger.warn(line + "Highest cuban state is " + absentStr + " " + stateLabel);
256
        }
257

    
258
    }
259

    
260

    
261
    /**
262
     * @param indigenousStr
263
     * @return
264
     */
265
    private boolean isMinus(String str) {
266
        return str.equals("-") || str.equals("–");
267
    }
268

    
269

    
270
    /**
271
     * @param indigenousStr
272
     * @return
273
     */
274
    private boolean checkPlusMinusDoubt(String str) {
275
        return str.equals("+") || isMinus(str)|| str.equals("?");
276
    }
277

    
278

    
279
    /**
280
     * @param indigenousStr
281
     * @param indigenousDoubtStr
282
     * @param naturalisedStr
283
     * @param dudStr
284
     * @param advStr
285
     * @param cultStr
286
     */
287
    private boolean checkAllNull(String ... others) {
288
        for (String other : others){
289
            if (other != null){
290
                return false;
291
            }
292
        }
293
        return true;
294
    }
295

    
296

    
297
    private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
298
//    String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
299
//                    + "(\\((.{6,})\\))?";
300
    private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
301
                                                     +"(\\((.{6,})\\))?";
302
    private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
303
            +"(\\((.{6,})\\))?";
304
    private static final String auctRegExStr = "auct\\."
305
            +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sBritton|\\sGriseb\\.|\\sWright|\\sFRC|\\sCoL|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
306
    private static final String missapliedRegExStr = "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
307
    private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.";
308
    private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
309

    
310
    private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
311
    private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
312
    private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
313
    private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
314

    
315
    /**
316
     * @param record
317
     * @param state
318
     * @param taxon
319
     */
320
    private void makeSynonyms(HashMap<String, String> record, CubaImportState state) {
321
//        boolean forAccepted = true;
322
        String synonymStr = record.get("Syn.");
323
        String line = state.getCurrentLine() + ": ";
324

    
325
        if (synonymStr == null){
326
            //TODO test that this is not a synonym only line
327
            return;
328
        }
329
        synonymStr = synonymStr.trim();
330

    
331
//        String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
332
//        String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
333

    
334
//        Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
335

    
336
        Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
337
        Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
338
        Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
339
        Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
340

    
341
        List<BotanicalName> homonyms = new ArrayList<>();
342
        if (missapliedMatcher.matches()){
343
            boolean doubtful = missapliedMatcher.group(1) != null;
344
            String firstPart = missapliedMatcher.group(2);
345
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
346

    
347
            String secondPart = missapliedMatcher.group(3);
348
            Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
349
            misappliedNameTaxon.setDoubtful(doubtful);
350
            if (secondPart.startsWith("sensu")){
351
                secondPart = secondPart.substring(5).trim();
352
                if (secondPart.contains(" ")){
353
                    logger.warn(line + "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart);
354
                }
355
                Reference<?> sensu = ReferenceFactory.newGeneric();
356
                Team team = Team.NewTitledInstance(secondPart, null);
357
                sensu.setAuthorship(team);
358
                misappliedNameTaxon.setSec(sensu);
359
            }else if (secondPart.matches(auctRegExStr)){
360
                secondPart = secondPart.replace("p. p.", "p.p.");
361
                misappliedNameTaxon.setAppendedPhrase(secondPart);
362
            }else{
363
                logger.warn(line + "Misapplied second part not recognized: " + secondPart);
364
            }
365
            //TODO
366
            Reference<?> relRef = null;
367
            state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
368
        }else if (nomInvalMatcher.matches()){
369
            String firstPart = nomInvalMatcher.group(1);
370
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
371
            NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
372
            name.addStatus(status);
373
            state.getCurrentTaxon().addSynonymName(name, SynonymRelationshipType.SYNONYM_OF());
374
        }else if (acceptedMatcher.matches()){
375
            String firstPart = acceptedMatcher.group(1);
376
            String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
377
            handleHomotypicGroup(firstPart, state, (BotanicalName)state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
378
        }else if(heterotypicMatcher.matches()){
379
            String firstPart = heterotypicMatcher.group(1).trim();
380
            String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
381
            String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
382
            boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
383
            boolean isHomonym = firstPart.trim().matches(".*" + HOMONYM_MARKER);
384
            firstPart = normalizeStatus(firstPart);
385
            BotanicalName synName = (BotanicalName)nameParser.parseReferencedName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
386
            if (synName.isProtectedTitleCache()){
387
                logger.warn(line + "heterotypic base synonym could not be parsed correctly:" + firstPart);
388
            }
389
            if (isHomonym){
390
                homonyms.add(synName);
391
            }
392
            SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
393
            sr.getSynonym().setDoubtful(isDoubtful);
394
            handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
395
        }else{
396
            logger.warn(line + "Synonym entry does not match: " + synonymStr);
397
        }
398
    }
399

    
400

    
401

    
402
    /**
403
     * @param synonymStr
404
     * @param state
405
     * @param homonyms
406
     * @param homonymPart
407
     * @param isDoubtful
408
     * @param taxon
409
     * @param homotypicalGroup
410
     */
411
    private void handleHomotypicGroup(String homotypicStr,
412
            CubaImportState state,
413
            BotanicalName homotypicName,
414
            boolean isHeterotypic,
415
            List<BotanicalName> homonyms,
416
            String homonymPart,
417
            boolean isDoubtful) {
418

    
419
        if (homotypicStr == null){
420
            return;
421
        }else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
422
            homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
423
        }
424

    
425
        BotanicalName currentBasionym = homotypicName;
426
        String[] splits = homotypicStr.split("\\s*,\\s*");
427
        for (String split : splits){
428
            boolean isHomonym = split.trim().matches(".*" + HOMONYM_MARKER);
429
            String singleName = normalizeStatus(split);
430
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(singleName, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
431
            if (newName.isProtectedTitleCache()){
432
                logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
433
            }
434
            if (isHomonym){
435
                homonyms.add(newName);
436
            }
437
            if (isHeterotypic){
438
                SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(newName, homotypicName.getHomotypicalGroup(), null, null);
439
                sr.getSynonym().setDoubtful(isDoubtful);
440
//                newName.addBasionym(homotypicName);
441
                currentBasionym = handleBasionym(currentBasionym, newName);
442
            }else{
443
                state.getCurrentTaxon().addHomotypicSynonymName(newName, null, null);
444
                handleBasionym(currentBasionym, newName);
445
            }
446
        }
447
        makeHomonyms(homonyms, homonymPart, state);
448
    }
449

    
450

    
451
    /**
452
     * @param homonyms
453
     * @param homonymPart
454
     * @param state
455
     */
456
    private void makeHomonyms(List<BotanicalName> homonyms, String homonymPart, CubaImportState state) {
457
        String line = state.getCurrentLine() + ": ";
458
        homonymPart = homonymPart == null ? "" : homonymPart.trim();
459
        if (homonyms.isEmpty() && homonymPart.equals("")){
460
            return;
461
        }else if (homonymPart.equals("")){
462
            logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
463
            return;
464
        }
465
        homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
466
        String[] splits = homonymPart.split("\\]\\s*\\[");
467
        if (splits.length != homonyms.size()){
468
            logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
469
            return;
470
        }
471
        int i = 0;
472
        for (String split : splits){
473
            split = split.replaceAll("^non\\s+", "");
474
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
475
            if (newName.isProtectedTitleCache()){
476
                logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
477
            }
478
            newName.addRelationshipToName(homonyms.get(i), NameRelationshipType.LATER_HOMONYM(), null);
479
            i++;
480
        }
481
    }
482

    
483

    
484
    /**
485
     * @param newName
486
     * @param homotypicName
487
     * @return
488
     */
489
    private BotanicalName handleBasionym(BotanicalName currentBasionym, BotanicalName name2) {
490
        BotanicalName basionymName = currentBasionym;
491
        BotanicalName newCombination = name2;
492
        //switch if necessary
493
        if (basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
494
            basionymName = name2;
495
            newCombination = currentBasionym;
496
        }
497
//        newCombination.getHomotypicalGroup().removeGroupBasionym(xxx);
498
        if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())){
499
            newCombination.getHomotypicalGroup().setGroupBasionym(basionymName);
500
        }
501
        return basionymName;
502
    }
503

    
504

    
505
    /**
506
     * @param combinationAuthorship
507
     * @param basi
508
     * @return
509
     */
510
    private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
511
        if (author1 == null || author2 == null){
512
            return false;
513
        }else {
514
            return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
515
        }
516
    }
517

    
518

    
519
    /**
520
     * @param record
521
     * @param state
522
     * @param taxon
523
     */
524
    private void makeNotes(HashMap<String, String> record, CubaImportState state) {
525
        String notesStr = getValue(record, "(Notas)");
526
        if (notesStr == null){
527
            return;
528
        }else{
529
            Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
530
            //TODO
531
            annotation.setAnnotationType(AnnotationType.TECHNICAL());
532
            state.getCurrentTaxon().addAnnotation(annotation);
533
        }
534
    }
535

    
536

    
537
    /**
538
     * @param record
539
     * @param state
540
     * @param familyTaxon
541
     * @return
542
     */
543
    private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
544
        String taxonStr = getValue(record, "Taxón");
545
        if (taxonStr == null){
546
            return isSynonym ? state.getCurrentTaxon() : null;
547
        }
548
        boolean isAbsent = false;
549
        if (taxonStr.startsWith("[") && taxonStr.endsWith("]")){
550
            taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
551
            isAbsent = true;
552
        }
553
        taxonStr = normalizeStatus(taxonStr);
554

    
555
        BotanicalName botanicalName = (BotanicalName)nameParser.parseReferencedName(taxonStr, nc, Rank.SPECIES());
556
        Reference<?> sec = getSecReference(state);
557
        Taxon taxon = Taxon.NewInstance(botanicalName, sec);
558
        TaxonNode higherNode;
559
        if (botanicalName.isProtectedTitleCache()){
560
            logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStr);
561
            higherNode = familyNode;
562
        }else{
563
            String genusStr = botanicalName.getGenusOrUninomial();
564
            Taxon genus = state.getHigherTaxon(genusStr);
565
            if (genus != null){
566
                higherNode = genus.getTaxonNodes().iterator().next();
567
            }else{
568
                BotanicalName name = BotanicalName.NewInstance(Rank.GENUS());
569
                name.setGenusOrUninomial(genusStr);
570
                genus = Taxon.NewInstance(name, sec);
571
                higherNode = familyNode.addChildTaxon(genus, null, null);
572
                state.putHigherTaxon(genusStr, genus);
573
            }
574
        }
575

    
576
        higherNode.addChildTaxon(taxon, null, null);
577

    
578
        return taxon;
579
    }
580

    
581
    /**
582
     * @param state
583
     * @return
584
     */
585
    private Reference<?> getSecReference(CubaImportState state) {
586
        Reference<?> result = state.getSecReference();
587
        if (result == null){
588
            result = ReferenceFactory.newDatabase();
589
            result.setTitle("Flora of Cuba");
590
            state.setSecReference(result);
591
        }
592
        return result;
593
    }
594

    
595

    
596
    private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
597
            "nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub."};
598
    /**
599
     * @param taxonStr
600
     * @return
601
     */
602
    private String normalizeStatus(String taxonStr) {
603
        if (taxonStr == null){
604
            return null;
605
        }
606
        for (String nomStatusStr : nomStatusStrings){
607
            nomStatusStr = " " + nomStatusStr;
608
            if (taxonStr.endsWith(nomStatusStr)){
609
                taxonStr = taxonStr.replace(nomStatusStr, "," + nomStatusStr);
610
            }
611
        }
612
        taxonStr = taxonStr.replaceAll(HOMONYM_MARKER, "").trim();
613
        taxonStr = taxonStr.replaceAll(DOUBTFUL_MARKER, "").trim();
614
        return taxonStr;
615

    
616

    
617
    }
618

    
619

    
620
    /**
621
     * @param record
622
     * @param state
623
     * @return
624
     */
625
    private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
626
        String familyStr = getValue(record, "Fam.");
627
        if (familyStr == null){
628
            return null;
629
        }
630
        Taxon family = state.getHigherTaxon(familyStr);
631
        TaxonNode familyNode;
632
        if (family != null){
633
            familyNode = family.getTaxonNodes().iterator().next();
634
        }else{
635
            BotanicalName name = BotanicalName.NewInstance(Rank.FAMILY());
636
            name.setGenusOrUninomial(familyStr);
637
            Reference<?> sec = getSecReference(state);
638
            Taxon taxon = Taxon.NewInstance(name, sec);
639
            ITaxonTreeNode rootNode = getClassification(state);
640
            familyNode = rootNode.addChildTaxon(taxon, sec, null);
641
            state.putHigherTaxon(familyStr, taxon);
642
        }
643

    
644
        return familyNode;
645
    }
646

    
647

    
648
    /**
649
     * @param state
650
     * @return
651
     */
652
    private TaxonNode getClassification(CubaImportState state) {
653
        Classification classification = state.getClassification();
654
        if (classification == null){
655
            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
656
        }
657
        TaxonNode rootNode = state.getRootNode();
658
        if (rootNode == null){
659
            rootNode = getTaxonNodeService().find(plantaeUuid);
660
        }
661
        if (rootNode == null){
662
            Reference<?> sec = getSecReference(state);
663
            if (classification == null){
664
                String classificationName = state.getConfig().getClassificationName();
665
                //TODO
666
                Language language = Language.DEFAULT();
667
                classification = Classification.NewInstance(classificationName, sec, language);
668
                state.setClassification(classification);
669
                classification.setUuid(state.getConfig().getClassificationUuid());
670
                classification.getRootNode().setUuid(rootUuid);
671
            }
672

    
673
            BotanicalName plantaeName = BotanicalName.NewInstance(Rank.KINGDOM());
674
            plantaeName.setGenusOrUninomial("Plantae");
675
            Taxon plantae = Taxon.NewInstance(plantaeName, sec);
676
            TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
677
            plantaeNode.setUuid(plantaeUuid);
678
            state.setRootNode(plantaeNode);
679
            getClassificationService().save(classification);
680

    
681
            rootNode = plantaeNode;
682
        }
683
        return rootNode;
684
    }
685

    
686

    
687
    /**
688
     * @param record
689
     * @param originalKey
690
     * @return
691
     */
692
    private String getValue(HashMap<String, String> record, String originalKey) {
693
        String value = record.get(originalKey);
694
        if (! StringUtils.isBlank(value)) {
695
        	if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
696
        	value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
697
        	return value;
698
        }else{
699
        	return null;
700
        }
701
    }
702

    
703

    
704

    
705
	/**
706
	 *  Stores taxa records in DB
707
	 */
708
	@Override
709
    protected void firstPass(CubaImportState state) {
710
	    boolean isSynonym = false;
711

    
712
        String line = state.getCurrentLine() + ": ";
713
        HashMap<String, String> record = state.getOriginalRecord();
714

    
715
        Set<String> keys = record.keySet();
716
        for (String key: keys) {
717
            if (! expectedKeys.contains(key)){
718
                logger.warn(line + "Unexpected Key: " + key);
719
            }
720
        }
721

    
722
        if (record.get("Fam.") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
723
            //second header line, don't handle
724
            return;
725
        }
726

    
727
        //Fam.
728
        TaxonNode familyTaxon = getFamilyTaxon(record, state);
729
        if (familyTaxon == null){
730
            if (record.get("Taxón") != null){
731
                logger.warn(line + "Family not recognized but taxon exists: " + record.get("Taxón"));
732
                return;
733
            }else if (record.get("Syn.") == null){
734
                logger.warn(line + "Family not recognized but also no synonym exists");
735
                return;
736
            }else{
737
                isSynonym = true;
738
            }
739
        }
740

    
741
        //(Fam.)
742
        //TODO
743

    
744
        //Taxón
745
        Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonym);
746
        if (taxon == null && ! isSynonym){
747
            logger.warn(line + "taxon could not be created and is null");
748
            return;
749
        }
750
        state.setCurrentTaxon(taxon);
751

    
752
        //(Notas)
753
        makeNotes(record, state);
754

    
755
        //Syn.
756
        makeSynonyms(record, state);
757

    
758
        //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
759
        makeCubanDistribution(record, state);
760

    
761

    
762
//        "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
763
//        "CuC","VC","Ci","SS","CA","Cam","LT",
764
//        "CuE","Gr","Ho","SC","Gu",
765
        makeProvincesDistribution(record, state);
766

    
767
//    "Esp","Ja","PR","Men","Bah","Cay",
768
//    "AmN","AmC","AmS","VM"});
769
      makeOtherAreasDistribution(record, state);
770

    
771

    
772

    
773
        state.setHighestStatusForTaxon(null);
774

    
775
		return;
776
    }
777

    
778

    
779

    
780
	/**
781
     * @param record
782
     * @param state
783
     * @param taxon
784
     */
785
    // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
786
//  "CuC","VC","Ci","SS","CA","Cam","LT",
787
//  "CuE","Gr","Ho","SC","Gu",
788
    private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
789
        List<String> areaKeys = Arrays.asList(new String[]{
790
                "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
791
                "CuC","VC","Ci","SS","CA","Cam","LT",
792
                "CuE","Gr","Ho","SC","Gu",
793
                });
794
        for (String areaKey : areaKeys){
795
            state.setCubanProvince(true);
796
            makeSingleProvinceDistribution(areaKey, record, state);
797
        }
798
    }
799

    
800
    private void makeOtherAreasDistribution(HashMap<String, String> record, CubaImportState state) {
801
        List<String> areaKeys = Arrays.asList(new String[]{
802
                "Esp","Ja","PR","Men","Bah","Cay",
803
                "AmN","AmC","AmS","VM"});
804
        for (String areaKey : areaKeys){
805
            state.setCubanProvince(false);
806
            makeSingleProvinceDistribution(areaKey, record, state);
807
        }
808
    }
809

    
810

    
811
    /**
812
     * @param areaKey
813
     * @param record
814
     * @param state
815
     * @param taxon
816
     */
817
    private void makeSingleProvinceDistribution(String areaKey,
818
            HashMap<String, String> record,
819
            CubaImportState state) {
820
        try {
821
            UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
822
            if (areaUuid == null){
823
                logger.warn("Area not recognized: " + areaKey);
824
                return;
825
            }
826
            if (record.get(areaKey)==null){
827
                return; //no status defined
828
            }
829

    
830
            NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
831
            if (area == null){
832
                logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
833
            }
834
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
835
            PresenceAbsenceTerm status =  makeProvinceStatus(areaKey, record, state);
836
            if (status == null){
837
                logger.warn(state.getCurrentLine() + ": Province distribution status could not be defined: " + record.get(areaKey));
838
            }
839
            Distribution distribution = Distribution.NewInstance(area, status);
840
            desc.addElement(distribution);
841
        } catch (UndefinedTransformerMethodException e) {
842
            e.printStackTrace();
843
        }
844

    
845
    }
846

    
847

    
848
    /**
849
     * @param areaKey
850
     * @param record
851
     * @param state
852
     * @param highestStatus
853
     * @return
854
     * @throws UndefinedTransformerMethodException
855
     */
856
    private PresenceAbsenceTerm makeProvinceStatus(String areaKey,
857
            HashMap<String, String> record,
858
            CubaImportState state) throws UndefinedTransformerMethodException {
859

    
860
        String statusStr = record.get(areaKey);
861
        if (statusStr == null){
862
            return null;
863
        }
864
        PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
865
        if (status == null){
866
            PresenceAbsenceTerm highestStatus = state.getHighestStatusForTaxon();
867
            if (state.isCubanProvince() && isMinus(statusStr)){
868
                getAbsenceTermForStatus(state, highestStatus);
869
            }else if (! state.isCubanProvince() && isMinus(statusStr)){
870
                status = state.getTransformer().getPresenceTermByKey("");
871
            }else{
872
                UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
873
                status = getPresenceTerm(state, statusUuid, null, null, null, false);
874
            }
875
        }
876

    
877
        return status;
878
    }
879

    
880

    
881
    /**
882
     * @param highestStatus
883
     * @throws UndefinedTransformerMethodException
884
     */
885
    private PresenceAbsenceTerm getAbsenceTermForStatus(CubaImportState state, PresenceAbsenceTerm highestStatus) throws UndefinedTransformerMethodException {
886
        if (highestStatus == null){
887
            logger.warn(state.getCurrentLine() + ": Highest status not defined");
888
            return null;
889
        }
890
        PresenceAbsenceTerm result = null;
891
        if (highestStatus.equals(getStatus(state, "E"))){
892
            result = getStatus(state, "-E");
893
        }else if (highestStatus.getUuid().equals(state.getTransformer().getPresenceTermUuid("Ind.")) || highestStatus.equals(PresenceAbsenceTerm.NATIVE())){
894
            result = getStatus(state, "-Ind.");
895
        }else if (highestStatus.equals(getStatus(state, "Ind.?"))){
896
            result = getStatus(state, "-Ind.?");  //TODO
897
        }else if (highestStatus.equals(getStatus(state, "N"))){
898
            result = getStatus(state, "-N");
899
        }else if (highestStatus.equals(getStatus(state, "P"))){
900
            result = getStatus(state, "-P");
901
        }else if (highestStatus.equals(getStatus(state, "A"))){
902
            result = getStatus(state, "-A");
903
        }else if (highestStatus.equals(getStatus(state, "C"))){
904
            result = getStatus(state, "-C");
905
        }
906
        logger.warn(state.getCurrentLine() + ": Absent province status could not be defined for highest status " + highestStatus.getTitleCache());
907
        return result;
908
    }
909

    
910

    
911
    /**
912
     * @param string
913
     * @return
914
     * @throws UndefinedTransformerMethodException
915
     */
916
    private PresenceAbsenceTerm getStatus(CubaImportState state, String key) throws UndefinedTransformerMethodException {
917
        PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(key);
918
        if (status == null){
919
            UUID statusUuid = state.getTransformer().getPresenceTermUuid(key);
920
            status = getPresenceTerm(state, statusUuid, null, null, null, false);
921
        }
922
        return status;
923
    }
924

    
925

    
926
    /**
927
	 *  Stores parent-child, synonym and common name relationships
928
	 */
929
	@Override
930
    protected void secondPass(CubaImportState state) {
931
//		CyprusRow cyprusRow = state.getCyprusRow();
932
		return;
933
	}
934

    
935

    
936
    @Override
937
    protected boolean isIgnore(CubaImportState state) {
938
        return ! state.getConfig().isDoTaxa();
939
    }
940

    
941
    @Override
942
    protected boolean doCheck(CubaImportState state) {
943
        logger.warn("DoCheck not yet implemented for CubaExcelImport");
944
        return true;
945
    }
946

    
947
}
(1-1/5)