Project

General

Profile

Download (33.3 KB) Statistics
| Branch: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.cuba;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.Set;
17
import java.util.UUID;
18
import java.util.regex.Matcher;
19
import java.util.regex.Pattern;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.stereotype.Component;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
27
import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
28
import eu.etaxonomy.cdm.model.agent.Team;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30
import eu.etaxonomy.cdm.model.common.Annotation;
31
import eu.etaxonomy.cdm.model.common.AnnotationType;
32
import eu.etaxonomy.cdm.model.common.Language;
33
import eu.etaxonomy.cdm.model.description.Distribution;
34
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
35
import eu.etaxonomy.cdm.model.description.TaxonDescription;
36
import eu.etaxonomy.cdm.model.location.NamedArea;
37
import eu.etaxonomy.cdm.model.name.BotanicalName;
38
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
39
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
40
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.reference.Reference;
44
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
45
import eu.etaxonomy.cdm.model.taxon.Classification;
46
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
47
import eu.etaxonomy.cdm.model.taxon.SynonymRelationship;
48
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
49
import eu.etaxonomy.cdm.model.taxon.Taxon;
50
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
51
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
52
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
53

    
54
/**
55
 * @author a.mueller
56
 * @created 05.01.2016
57
 */
58

    
59
@Component
60
public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
61
    private static final long serialVersionUID = -747486709409732371L;
62
    private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
63

    
64
    private static final String HOMONYM_MARKER = ".*\\s+homon.?$";
65
    private static final String DOUBTFUL_MARKER = "^\\?\\s?";
66

    
67

    
68
    private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
69
    private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
70

    
71
    private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
72
    private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
73

    
74
    private  static List<String> expectedKeys= Arrays.asList(new String[]{"Fam.","(Fam.)","Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
75

    
76
	@Override
77
    protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
78
	    //we do everything in firstPass here
79
    	return;
80
    }
81

    
82

    
83
    /**
84
     * @param record
85
     * @param state
86
     * @param taxon
87
     */
88
    private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
89
        try {
90
            NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("C"), null, null, null, null, null);
91
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
92
            List<PresenceAbsenceTerm> statuss =  makeCubanStatus(record, state);
93
            for (PresenceAbsenceTerm status : statuss){
94
                Distribution distribution = Distribution.NewInstance(cuba, status);
95
                desc.addElement(distribution);
96
            }
97
        } catch (UndefinedTransformerMethodException e) {
98
            e.printStackTrace();
99
        }
100
    }
101

    
102

    
103
    /**
104
     * @param record
105
     * @param state
106
     * @return
107
     * @throws UndefinedTransformerMethodException
108
     */
109
    private List<PresenceAbsenceTerm> makeCubanStatus(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
110
        boolean isAbsent = false;  //TODO
111

    
112
        String line = state.getCurrentLine() + ": ";
113
        List<PresenceAbsenceTerm> result = new ArrayList<>();
114

    
115
        String endemicStr = getValue(record, "End");
116
        String indigenousStr = getValue(record, "Ind");
117
        String indigenousDoubtStr = getValue(record, "Ind? D");
118
        String naturalisedStr = getValue(record, "Nat");
119
        String dudStr = getValue(record, "Dud P");
120
        String advStr = getValue(record, "Adv");
121
        String cultStr = getValue(record, "Cult C");
122

    
123
        if (endemicStr != null){
124
            if(endemicStr.equals("+")){
125
                PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
126
                result.add(endemicState);
127
            }else if(isMinus(endemicStr)){
128
                UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
129
                PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
130
                result.add(endemicState);
131
            }else{
132
                logger.warn(line + "Endemic not recognized: " + endemicStr);
133
            }
134
        }
135
        if (indigenousStr != null){
136
            if(indigenousStr.equals("+")){
137
                UUID indigenousUuid = state.getTransformer().getPresenceTermUuid("Ind.");
138
                PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
139
                result.add(indigenousState);
140
            }else if(isMinus(indigenousStr)){
141
                PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("-Ind.");
142
                result.add(haturalizedState);
143
            }else if(indigenousStr.equals("?")){
144
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("?Ind.");
145
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
146
                result.add(indigenousDoubtState);
147
            }else{
148
                logger.warn(line + "Indigenous not recognized: " + indigenousStr);
149
            }
150
        }
151
        if(indigenousDoubtStr != null){
152
            if(indigenousDoubtStr.equals("D")){
153
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("Ind.?");
154
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
155
                result.add(indigenousDoubtState);
156
            }else{
157
                logger.warn(line + "Indigenous doubtful not recognized: " + indigenousDoubtStr);
158
            }
159
        }
160
        if(naturalisedStr != null){
161
            if(naturalisedStr.equals("N")){
162
                  PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
163
                  result.add(haturalizedState);
164
            }else if(isMinus(naturalisedStr)){
165
                UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
166
                PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
167
                result.add(naturalisedErrorState);
168
            }else if(naturalisedStr.equals("?")){
169
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
170
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
171
                result.add(naturalisedDoubtState);
172
            }else{
173
                logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
174
            }
175
        }
176
        if(dudStr != null){
177
            if(dudStr.equals("P")){
178
                UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
179
                PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
180
                result.add(dudState);
181
            }else if(isMinus(dudStr)){
182
                UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
183
                PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
184
                result.add(nonNativeErrorState);
185
            }else if(dudStr.equals("?")){
186
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
187
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
188
                result.add(naturalisedDoubtState);
189
            }else{
190
                logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
191
            }
192
        }
193
        if(advStr != null){
194
            if(advStr.equals("A")){
195
                UUID advUuid = state.getTransformer().getPresenceTermUuid("Adv.");
196
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
197
                result.add(advState);
198
            }else if(isMinus(advStr)){
199
                UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
200
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
201
                result.add(advState);
202
            }else{
203
                logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
204
            }
205
        }else if(cultStr != null){
206
            if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
207
                logger.warn("'cultivated' not recognized: " + cultStr);
208
            }else if(cultStr.equals("C")){
209
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
210
                result.add(cultivatedState);
211
            }else if(cultStr.equals("?")){
212
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
213
                result.add(cultivatedState);
214
            }else if(cultStr.equals("(C)")){
215
                UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
216
                PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
217
                result.add(cultivatedState);
218
            }else if(isMinus(cultStr)){
219
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
220
                result.add(cultivatedState);
221
            }else{
222
                logger.warn(line + "'cultivated' not recognized: " + cultStr);
223
            }
224
        }
225

    
226
        return result;
227
    }
228

    
229

    
230
    /**
231
     * @param indigenousStr
232
     * @return
233
     */
234
    private boolean isMinus(String str) {
235
        return str.equals("-") || str.equals("–");
236
    }
237

    
238

    
239
    /**
240
     * @param indigenousStr
241
     * @return
242
     */
243
    private boolean checkPlusMinusDoubt(String str) {
244
        return str.equals("+") || isMinus(str)|| str.equals("?");
245
    }
246

    
247

    
248
    /**
249
     * @param indigenousStr
250
     * @param indigenousDoubtStr
251
     * @param naturalisedStr
252
     * @param dudStr
253
     * @param advStr
254
     * @param cultStr
255
     */
256
    private boolean checkAllNull(String ... others) {
257
        for (String other : others){
258
            if (other != null){
259
                return false;
260
            }
261
        }
262
        return true;
263
    }
264

    
265

    
266
    private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
267
//    String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
268
//                    + "(\\((.{6,})\\))?";
269
    private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
270
                                                     +"(\\((.{6,})\\))?";
271
    private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
272
            +"(\\((.{6,})\\))?";
273
    private static final String missapliedRegExStr = "“(.*{5,})”\\s+(auct\\.(\\sFC\\-S)?(\\s+p\\.\\s*p\\.)?|sensu\\s+.{2,})";
274
    private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.";
275
    private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
276

    
277
    private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
278
    private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
279
    private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
280
    private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
281

    
282
    /**
283
     * @param record
284
     * @param state
285
     * @param taxon
286
     */
287
    private void makeSynonyms(HashMap<String, String> record, CubaImportState state) {
288
//        boolean forAccepted = true;
289
        String synonymStr = record.get("Syn.");
290
        String line = state.getCurrentLine() + ": ";
291

    
292
        if (synonymStr == null){
293
            //TODO test that this is not a synonym only line
294
            return;
295
        }
296
        synonymStr = synonymStr.trim();
297

    
298
//        String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
299
//        String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
300

    
301
//        Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
302

    
303
        Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
304
        Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
305
        Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
306
        Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
307

    
308
        List<BotanicalName> homonyms = new ArrayList<>();
309
        if (missapliedMatcher.matches()){
310
            String firstPart = missapliedMatcher.group(1);
311
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
312

    
313
            String secondPart = missapliedMatcher.group(2);
314
            Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
315
            if (secondPart.startsWith("sensu")){
316
                secondPart = secondPart.substring(5).trim();
317
                if (secondPart.contains(" ")){
318
                    logger.warn(line + "Second part contains more than 1 word. Check if this is correct: " + secondPart);
319
                }
320
                Reference<?> sensu = ReferenceFactory.newGeneric();
321
                Team team = Team.NewTitledInstance(secondPart, null);
322
                sensu.setAuthorship(team);
323
                misappliedNameTaxon.setSec(sensu);
324
            }else if (secondPart.matches("auct.((\\s+p\\.\\s*p\\.)|(\\sFC\\-S))?")){
325
                secondPart = secondPart.replace("p. p.", "p.p.");
326
                misappliedNameTaxon.setAppendedPhrase(secondPart);
327
            }else{
328
                logger.warn(line + "Misapplied second part not recognized: " + secondPart);
329
            }
330
            //TODO
331
            Reference<?> relRef = null;
332
            state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
333
        }else if (nomInvalMatcher.matches()){
334
            String firstPart = nomInvalMatcher.group(1);
335
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
336
            NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
337
            name.addStatus(status);
338
            state.getCurrentTaxon().addSynonymName(name, SynonymRelationshipType.SYNONYM_OF());
339
        }else if (acceptedMatcher.matches()){
340
            String firstPart = acceptedMatcher.group(1);
341
            String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
342
            handleHomotypicGroup(firstPart, state, (BotanicalName)state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
343
        }else if(heterotypicMatcher.matches()){
344
            String firstPart = heterotypicMatcher.group(1).trim();
345
            String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
346
            String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
347
            boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
348
            boolean isHomonym = firstPart.trim().matches(HOMONYM_MARKER);
349
            firstPart = normalizeStatus(firstPart);
350
            BotanicalName synName = (BotanicalName)nameParser.parseReferencedName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
351
            if (synName.isProtectedTitleCache()){
352
                logger.warn(line + "heterotypic base synonym could not be parsed correctly:" + firstPart);
353
            }
354
            if (isHomonym){
355
                homonyms.add(synName);
356
            }
357
            SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
358
            sr.getSynonym().setDoubtful(isDoubtful);
359
            handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
360
        }else{
361
            logger.warn(line + "Synonym entry does not match: " + synonymStr);
362
        }
363
    }
364

    
365

    
366

    
367
    /**
368
     * @param synonymStr
369
     * @param state
370
     * @param homonyms
371
     * @param homonymPart
372
     * @param isDoubtful
373
     * @param taxon
374
     * @param homotypicalGroup
375
     */
376
    private void handleHomotypicGroup(String homotypicStr,
377
            CubaImportState state,
378
            BotanicalName homotypicName,
379
            boolean isHeterotypic,
380
            List<BotanicalName> homonyms,
381
            String homonymPart,
382
            boolean isDoubtful) {
383

    
384
        if (homotypicStr == null){
385
            return;
386
        }else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
387
            homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
388
        }
389

    
390
        BotanicalName currentBasionym = homotypicName;
391
        String[] splits = homotypicStr.split("\\s*,\\s*");
392
        for (String split : splits){
393
            boolean isHomonym = split.trim().matches(HOMONYM_MARKER);
394
            String singleName = normalizeStatus(split);
395
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(singleName, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
396
            if (newName.isProtectedTitleCache()){
397
                logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
398
            }
399
            if (isHomonym){
400
                homonyms.add(newName);
401
            }
402
            if (isHeterotypic){
403
                SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(newName, homotypicName.getHomotypicalGroup(), null, null);
404
                sr.getSynonym().setDoubtful(isDoubtful);
405
//                newName.addBasionym(homotypicName);
406
                currentBasionym = handleBasionym(currentBasionym, newName);
407
            }else{
408
                state.getCurrentTaxon().addHomotypicSynonymName(newName, null, null);
409
                handleBasionym(currentBasionym, newName);
410
            }
411
        }
412
        makeHomonyms(homonyms, homonymPart, state);
413
    }
414

    
415

    
416
    /**
417
     * @param homonyms
418
     * @param homonymPart
419
     * @param state
420
     */
421
    private void makeHomonyms(List<BotanicalName> homonyms, String homonymPart, CubaImportState state) {
422
        String line = state.getCurrentLine() + ": ";
423
        homonymPart = homonymPart == null ? "" : homonymPart.trim();
424
        if (homonyms.isEmpty() && homonymPart.equals("")){
425
            return;
426
        }else if (homonymPart.equals("")){
427
            logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
428
            return;
429
        }
430
        homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
431
        String[] splits = homonymPart.split("\\]\\s*\\[");
432
        if (splits.length != homonyms.size()){
433
            logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
434
            return;
435
        }
436
        int i = 0;
437
        for (String split : splits){
438
            split = split.replaceAll("^non\\s+", "");
439
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
440
            if (newName.isProtectedTitleCache()){
441
                logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
442
            }
443
            newName.addRelationshipToName(homonyms.get(i), NameRelationshipType.LATER_HOMONYM(), null);
444
            i++;
445
        }
446
    }
447

    
448

    
449
    /**
450
     * @param newName
451
     * @param homotypicName
452
     * @return
453
     */
454
    private BotanicalName handleBasionym(BotanicalName currentBasionym, BotanicalName name2) {
455
        BotanicalName basionymName = currentBasionym;
456
        BotanicalName newCombination = name2;
457
        //switch if necessary
458
        if (basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
459
            basionymName = name2;
460
            newCombination = currentBasionym;
461
        }
462
        if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())){
463
            newCombination.getHomotypicalGroup().setGroupBasionym(basionymName);
464
        }
465
        return basionymName;
466
    }
467

    
468

    
469
    /**
470
     * @param combinationAuthorship
471
     * @param basi
472
     * @return
473
     */
474
    private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
475
        if (author1 == null || author2 == null){
476
            return false;
477
        }else {
478
            return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
479
        }
480
    }
481

    
482

    
483
    /**
484
     * @param record
485
     * @param state
486
     * @param taxon
487
     */
488
    private void makeNotes(HashMap<String, String> record, CubaImportState state) {
489
        String notesStr = getValue(record, "(Notas)");
490
        if (notesStr == null){
491
            return;
492
        }else{
493
            Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
494
            //TODO
495
            annotation.setAnnotationType(AnnotationType.EDITORIAL());
496
            state.getCurrentTaxon().addAnnotation(annotation);
497
        }
498
    }
499

    
500

    
501
    /**
502
     * @param record
503
     * @param state
504
     * @param familyTaxon
505
     * @return
506
     */
507
    private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
508
        String taxonStr = getValue(record, "Taxón");
509
        if (taxonStr == null){
510
            return isSynonym ? state.getCurrentTaxon() : null;
511
        }
512
        boolean isAbsent = false;
513
        if (taxonStr.startsWith("[") && taxonStr.endsWith("]")){
514
            taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
515
            isAbsent = true;
516
        }
517
        taxonStr = normalizeStatus(taxonStr);
518

    
519
        BotanicalName botanicalName = (BotanicalName)nameParser.parseReferencedName(taxonStr, nc, Rank.SPECIES());
520
        Reference<?> sec = getSecReference(state);
521
        Taxon taxon = Taxon.NewInstance(botanicalName, sec);
522
        TaxonNode higherNode;
523
        if (botanicalName.isProtectedTitleCache()){
524
            logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStr);
525
            higherNode = familyNode;
526
        }else{
527
            String genusStr = botanicalName.getGenusOrUninomial();
528
            Taxon genus = state.getHigherTaxon(genusStr);
529
            if (genus != null){
530
                higherNode = genus.getTaxonNodes().iterator().next();
531
            }else{
532
                BotanicalName name = BotanicalName.NewInstance(Rank.GENUS());
533
                name.setGenusOrUninomial(genusStr);
534
                genus = Taxon.NewInstance(name, sec);
535
                higherNode = familyNode.addChildTaxon(genus, null, null);
536
                state.putHigherTaxon(genusStr, genus);
537
            }
538
        }
539

    
540
        higherNode.addChildTaxon(taxon, null, null);
541

    
542
        return taxon;
543
    }
544

    
545
    /**
546
     * @param state
547
     * @return
548
     */
549
    private Reference<?> getSecReference(CubaImportState state) {
550
        Reference<?> result = state.getSecReference();
551
        if (result == null){
552
            result = ReferenceFactory.newDatabase();
553
            result.setTitle("Flora of Cuba");
554
            state.setSecReference(result);
555
        }
556
        return result;
557
    }
558

    
559

    
560
    private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
561
            "nom. rej.","nom. cons. prop.","nom. altern."};
562
    /**
563
     * @param taxonStr
564
     * @return
565
     */
566
    private String normalizeStatus(String taxonStr) {
567
        if (taxonStr == null){
568
            return null;
569
        }
570
        for (String nomStatusStr : nomStatusStrings){
571
            nomStatusStr = " " + nomStatusStr;
572
            if (taxonStr.endsWith(nomStatusStr)){
573
                taxonStr = taxonStr.replace(nomStatusStr, "," + nomStatusStr);
574
            }
575
        }
576
        taxonStr = taxonStr.replaceAll(HOMONYM_MARKER, "").trim();
577
        taxonStr = taxonStr.replaceAll(DOUBTFUL_MARKER, "").trim();
578
        return taxonStr;
579

    
580

    
581
    }
582

    
583

    
584
    /**
585
     * @param record
586
     * @param state
587
     * @return
588
     */
589
    private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
590
        String familyStr = getValue(record, "Fam.");
591
        if (familyStr == null){
592
            return null;
593
        }
594
        Taxon family = state.getHigherTaxon(familyStr);
595
        TaxonNode familyNode;
596
        if (family != null){
597
            familyNode = family.getTaxonNodes().iterator().next();
598
        }else{
599
            BotanicalName name = BotanicalName.NewInstance(Rank.FAMILY());
600
            name.setGenusOrUninomial(familyStr);
601
            Reference<?> sec = getSecReference(state);
602
            Taxon taxon = Taxon.NewInstance(name, sec);
603
            ITaxonTreeNode rootNode = getClassification(state);
604
            familyNode = rootNode.addChildTaxon(taxon, sec, null);
605
            state.putHigherTaxon(familyStr, taxon);
606
        }
607

    
608
        return familyNode;
609
    }
610

    
611

    
612
    /**
613
     * @param state
614
     * @return
615
     */
616
    private TaxonNode getClassification(CubaImportState state) {
617
        Classification classification = state.getClassification();
618
        if (classification == null){
619
            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
620
        }
621
        TaxonNode rootNode = state.getRootNode();
622
        if (rootNode == null){
623
            rootNode = getTaxonNodeService().find(plantaeUuid);
624
        }
625
        if (rootNode == null){
626
            Reference<?> sec = getSecReference(state);
627
            if (classification == null){
628
                String classificationName = state.getConfig().getClassificationName();
629
                //TODO
630
                Language language = Language.DEFAULT();
631
                classification = Classification.NewInstance(classificationName, sec, language);
632
                state.setClassification(classification);
633
                classification.setUuid(state.getConfig().getClassificationUuid());
634
                classification.getRootNode().setUuid(rootUuid);
635
            }
636

    
637
            BotanicalName plantaeName = BotanicalName.NewInstance(Rank.KINGDOM());
638
            plantaeName.setGenusOrUninomial("Plantae");
639
            Taxon plantae = Taxon.NewInstance(plantaeName, sec);
640
            TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
641
            plantaeNode.setUuid(plantaeUuid);
642
            state.setRootNode(plantaeNode);
643
            getClassificationService().save(classification);
644

    
645
            rootNode = plantaeNode;
646
        }
647
        return rootNode;
648
    }
649

    
650

    
651
    /**
652
     * @param record
653
     * @param originalKey
654
     * @return
655
     */
656
    private String getValue(HashMap<String, String> record, String originalKey) {
657
        String value = record.get(originalKey);
658
        if (! StringUtils.isBlank(value)) {
659
        	if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
660
        	value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
661
        	return value;
662
        }else{
663
        	return null;
664
        }
665
    }
666

    
667

    
668

    
669
	/**
670
	 *  Stores taxa records in DB
671
	 */
672
	@Override
673
    protected void firstPass(CubaImportState state) {
674
	    boolean isSynonym = false;
675

    
676
        int line = state.getCurrentLine();
677
        HashMap<String, String> record = state.getOriginalRecord();
678

    
679
        Set<String> keys = record.keySet();
680
        for (String key: keys) {
681
            if (! expectedKeys.contains(key)){
682
                logger.warn("Unexpected Key: " + key);
683
            }
684
        }
685

    
686
        if (record.get("Fam.") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
687
            //second header line, don't handle
688
            return;
689
        }
690

    
691
        //Fam.
692
        TaxonNode familyTaxon = getFamilyTaxon(record, state);
693
        if (familyTaxon == null){
694
            if (record.get("Taxón") != null){
695
                logger.warn(line + ": Family not recognized but taxon exists:" + record.get("Taxón"));
696
                return;
697
            }else if (record.get("Syn.") == null){
698
                logger.warn(line + ": Family not recognized but also no synonym exists");
699
                return;
700
            }else{
701
                isSynonym = true;
702
            }
703
        }
704

    
705
        //(Fam.)
706
        //TODO
707

    
708
        //Taxón
709
        Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonym);
710
        if (taxon == null && ! isSynonym){
711
            logger.warn(line + ": taxon could not be created and is null");
712
            return;
713
        }
714
        state.setCurrentTaxon(taxon);
715

    
716
        //(Notas)
717
        makeNotes(record, state);
718

    
719
        //Syn.
720
        makeSynonyms(record, state);
721

    
722
        //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
723
        makeCubanDistribution(record, state);
724

    
725

    
726
        // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
727
//        "CuC","VC","Ci","SS","CA","Cam","LT",
728
//        "CuE","Gr","Ho","SC","Gu",
729
//      "Esp","Ja","PR","Men","Bah","Cay",
730
//      "AmN","AmC","AmS","VM"});
731
        makeProvincesDistribution(record, state);
732

    
733
		return;
734
    }
735

    
736

    
737

    
738
	/**
739
     * @param record
740
     * @param state
741
     * @param taxon
742
     */
743
    // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
744
//  "CuC","VC","Ci","SS","CA","Cam","LT",
745
//  "CuE","Gr","Ho","SC","Gu",
746
    private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
747
        List<String> areaKeys = Arrays.asList(new String[]{
748
                "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
749
                "CuC","VC","Ci","SS","CA","Cam","LT",
750
                "CuE","Gr","Ho","SC","Gu",
751
                "Esp","Ja","PR","Men","Bah","Cay",
752
                "AmN","AmC","AmS","VM"});
753
        for (String areaKey : areaKeys){
754
            makeSingleProvinceDistribution(areaKey, record, state);
755
        }
756

    
757
    }
758

    
759

    
760
    /**
761
     * @param areaKey
762
     * @param record
763
     * @param state
764
     * @param taxon
765
     */
766
    private void makeSingleProvinceDistribution(String areaKey,
767
            HashMap<String, String> record,
768
            CubaImportState state) {
769
        try {
770
            UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
771
            if (areaUuid == null){
772
                logger.warn("Area not recognized: " + areaKey);
773
                return;
774
            }
775
            if (record.get(areaKey)==null){
776
                return; //no status defined
777
            }
778

    
779
            NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
780
            if (area == null){
781
                logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
782
            }
783
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
784
            PresenceAbsenceTerm status =  makeProvinceStatus(areaKey, record, state);
785
            if (status == null){
786
                logger.warn(state.getCurrentLine() + ": Distribution Status could not be defined: " + record.get(areaKey));
787
            }
788
            Distribution distribution = Distribution.NewInstance(area, status);
789
            desc.addElement(distribution);
790
        } catch (UndefinedTransformerMethodException e) {
791
            e.printStackTrace();
792
        }
793

    
794
    }
795

    
796

    
797
    /**
798
     * @param areaKey
799
     * @param record
800
     * @param state
801
     * @return
802
     * @throws UndefinedTransformerMethodException
803
     */
804
    private PresenceAbsenceTerm makeProvinceStatus(String areaKey, HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
805
        String statusStr = record.get(areaKey);
806
        if (statusStr == null){
807
            return null;
808
        }
809
        PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
810
        if (status == null){
811
            UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
812
            status = getPresenceTerm(state, statusUuid, null, null, null, false);
813
        }
814
        return status;
815
    }
816

    
817

    
818
    /**
819
	 *  Stores parent-child, synonym and common name relationships
820
	 */
821
	@Override
822
    protected void secondPass(CubaImportState state) {
823
//		CyprusRow cyprusRow = state.getCyprusRow();
824
		return;
825
	}
826

    
827

    
828
    @Override
829
    protected boolean isIgnore(CubaImportState state) {
830
        return ! state.getConfig().isDoTaxa();
831
    }
832

    
833
    @Override
834
    protected boolean doCheck(CubaImportState state) {
835
        logger.warn("DoCheck not yet implemented for CubaExcelImport");
836
        return true;
837
    }
838

    
839
}
(1-1/5)