Project

General

Profile

Download (33.4 KB) Statistics
| Branch: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.cuba;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.Set;
17
import java.util.UUID;
18
import java.util.regex.Matcher;
19
import java.util.regex.Pattern;
20

    
21
import org.apache.commons.lang.StringUtils;
22
import org.apache.log4j.Logger;
23
import org.springframework.stereotype.Component;
24

    
25
import eu.etaxonomy.cdm.common.CdmUtils;
26
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
27
import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
28
import eu.etaxonomy.cdm.model.agent.Team;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30
import eu.etaxonomy.cdm.model.common.Annotation;
31
import eu.etaxonomy.cdm.model.common.AnnotationType;
32
import eu.etaxonomy.cdm.model.common.Language;
33
import eu.etaxonomy.cdm.model.description.Distribution;
34
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
35
import eu.etaxonomy.cdm.model.description.TaxonDescription;
36
import eu.etaxonomy.cdm.model.location.NamedArea;
37
import eu.etaxonomy.cdm.model.name.BotanicalName;
38
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
39
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
40
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.reference.Reference;
44
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
45
import eu.etaxonomy.cdm.model.taxon.Classification;
46
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
47
import eu.etaxonomy.cdm.model.taxon.SynonymRelationship;
48
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
49
import eu.etaxonomy.cdm.model.taxon.Taxon;
50
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
51
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
52
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
53

    
54
/**
55
 * @author a.mueller
56
 * @created 05.01.2016
57
 */
58

    
59
@Component
60
public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
61
    private static final long serialVersionUID = -747486709409732371L;
62
    private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
63

    
64
    private static final String HOMONYM_MARKER = ".*\\s+homon.?$";
65
    private static final String DOUBTFUL_MARKER = "^\\?\\s?";
66

    
67

    
68
    private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
69
    private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
70

    
71
    private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
72
    private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
73

    
74
    private  static List<String> expectedKeys= Arrays.asList(new String[]{"Fam.","(Fam.)","Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
75

    
76
	@Override
77
    protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
78
	    //we do everything in firstPass here
79
    	return;
80
    }
81

    
82

    
83
    /**
84
     * @param record
85
     * @param state
86
     * @param taxon
87
     */
88
    private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
89
        try {
90
            NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("C"), null, null, null, null, null);
91
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
92
            List<PresenceAbsenceTerm> statuss =  makeCubanStatus(record, state);
93
            for (PresenceAbsenceTerm status : statuss){
94
                Distribution distribution = Distribution.NewInstance(cuba, status);
95
                desc.addElement(distribution);
96
            }
97
        } catch (UndefinedTransformerMethodException e) {
98
            e.printStackTrace();
99
        }
100
    }
101

    
102

    
103
    /**
104
     * @param record
105
     * @param state
106
     * @return
107
     * @throws UndefinedTransformerMethodException
108
     */
109
    private List<PresenceAbsenceTerm> makeCubanStatus(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
110
        boolean isAbsent = false;  //TODO
111

    
112
        String line = state.getCurrentLine() + ": ";
113
        List<PresenceAbsenceTerm> result = new ArrayList<>();
114

    
115
        String endemicStr = getValue(record, "End");
116
        String indigenousStr = getValue(record, "Ind");
117
        String indigenousDoubtStr = getValue(record, "Ind? D");
118
        String naturalisedStr = getValue(record, "Nat");
119
        String dudStr = getValue(record, "Dud P");
120
        String advStr = getValue(record, "Adv");
121
        String cultStr = getValue(record, "Cult C");
122

    
123
        if (endemicStr != null){
124
            if(endemicStr.equals("+")){
125
                PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
126
                result.add(endemicState);
127
            }else if(isMinus(endemicStr)){
128
                UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
129
                PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
130
                result.add(endemicState);
131
            }else{
132
                logger.warn(line + "Endemic not recognized: " + endemicStr);
133
            }
134
        }
135
        if (indigenousStr != null){
136
            if(indigenousStr.equals("+")){
137
                UUID indigenousUuid = state.getTransformer().getPresenceTermUuid("Ind.");
138
                PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
139
                result.add(indigenousState);
140
            }else if(isMinus(indigenousStr)){
141
                PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("-Ind.");
142
                result.add(haturalizedState);
143
            }else if(indigenousStr.equals("?")){
144
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("?Ind.");
145
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
146
                result.add(indigenousDoubtState);
147
            }else{
148
                logger.warn(line + "Indigenous not recognized: " + indigenousStr);
149
            }
150
        }
151
        if(indigenousDoubtStr != null){
152
            if(indigenousDoubtStr.equals("D")){
153
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("Ind.?");
154
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
155
                result.add(indigenousDoubtState);
156
            }else{
157
                logger.warn(line + "Indigenous doubtful not recognized: " + indigenousDoubtStr);
158
            }
159
        }
160
        if(naturalisedStr != null){
161
            if(naturalisedStr.equals("N")){
162
                  PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
163
                  result.add(haturalizedState);
164
            }else if(isMinus(naturalisedStr)){
165
                UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
166
                PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
167
                result.add(naturalisedErrorState);
168
            }else if(naturalisedStr.equals("?")){
169
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
170
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
171
                result.add(naturalisedDoubtState);
172
            }else{
173
                logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
174
            }
175
        }
176
        if(dudStr != null){
177
            if(dudStr.equals("P")){
178
                UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
179
                PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
180
                result.add(dudState);
181
            }else if(isMinus(dudStr)){
182
                UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
183
                PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
184
                result.add(nonNativeErrorState);
185
            }else if(dudStr.equals("?")){
186
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
187
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
188
                result.add(naturalisedDoubtState);
189
            }else{
190
                logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
191
            }
192
        }
193
        if(advStr != null){
194
            if(advStr.equals("A")){
195
                UUID advUuid = state.getTransformer().getPresenceTermUuid("Adv.");
196
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
197
                result.add(advState);
198
            }else if(isMinus(advStr)){
199
                UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
200
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
201
                result.add(advState);
202
            }else{
203
                logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
204
            }
205
        }else if(cultStr != null){
206
            if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
207
                logger.warn("'cultivated' not recognized: " + cultStr);
208
            }else if(cultStr.equals("C")){
209
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
210
                result.add(cultivatedState);
211
            }else if(cultStr.equals("?")){
212
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
213
                result.add(cultivatedState);
214
            }else if(cultStr.equals("(C)")){
215
                UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
216
                PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
217
                result.add(cultivatedState);
218
            }else if(isMinus(cultStr)){
219
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
220
                result.add(cultivatedState);
221
            }else{
222
                logger.warn(line + "'cultivated' not recognized: " + cultStr);
223
            }
224
        }
225

    
226
        return result;
227
    }
228

    
229

    
230
    /**
231
     * @param indigenousStr
232
     * @return
233
     */
234
    private boolean isMinus(String str) {
235
        return str.equals("-") || str.equals("–");
236
    }
237

    
238

    
239
    /**
240
     * @param indigenousStr
241
     * @return
242
     */
243
    private boolean checkPlusMinusDoubt(String str) {
244
        return str.equals("+") || isMinus(str)|| str.equals("?");
245
    }
246

    
247

    
248
    /**
249
     * @param indigenousStr
250
     * @param indigenousDoubtStr
251
     * @param naturalisedStr
252
     * @param dudStr
253
     * @param advStr
254
     * @param cultStr
255
     */
256
    private boolean checkAllNull(String ... others) {
257
        for (String other : others){
258
            if (other != null){
259
                return false;
260
            }
261
        }
262
        return true;
263
    }
264

    
265

    
266
    private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
267
//    String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
268
//                    + "(\\((.{6,})\\))?";
269
    private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
270
                                                     +"(\\((.{6,})\\))?";
271
    private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
272
            +"(\\((.{6,})\\))?";
273
    private static final String auctRegExStr = "auct\\."
274
            +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S))?(\\s+p\\.\\s*p\\.)?";
275
    private static final String missapliedRegExStr = "“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
276
    private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.";
277
    private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
278

    
279
    private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
280
    private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
281
    private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
282
    private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
283

    
284
    /**
285
     * @param record
286
     * @param state
287
     * @param taxon
288
     */
289
    private void makeSynonyms(HashMap<String, String> record, CubaImportState state) {
290
//        boolean forAccepted = true;
291
        String synonymStr = record.get("Syn.");
292
        String line = state.getCurrentLine() + ": ";
293

    
294
        if (synonymStr == null){
295
            //TODO test that this is not a synonym only line
296
            return;
297
        }
298
        synonymStr = synonymStr.trim();
299

    
300
//        String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
301
//        String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
302

    
303
//        Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
304

    
305
        Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
306
        Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
307
        Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
308
        Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
309

    
310
        List<BotanicalName> homonyms = new ArrayList<>();
311
        if (missapliedMatcher.matches()){
312
            String firstPart = missapliedMatcher.group(1);
313
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
314

    
315
            String secondPart = missapliedMatcher.group(2);
316
            Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
317
            if (secondPart.startsWith("sensu")){
318
                secondPart = secondPart.substring(5).trim();
319
                if (secondPart.contains(" ")){
320
                    logger.warn(line + "Second part contains more than 1 word. Check if this is correct: " + secondPart);
321
                }
322
                Reference<?> sensu = ReferenceFactory.newGeneric();
323
                Team team = Team.NewTitledInstance(secondPart, null);
324
                sensu.setAuthorship(team);
325
                misappliedNameTaxon.setSec(sensu);
326
            }else if (secondPart.matches(auctRegExStr)){
327
                secondPart = secondPart.replace("p. p.", "p.p.");
328
                misappliedNameTaxon.setAppendedPhrase(secondPart);
329
            }else{
330
                logger.warn(line + "Misapplied second part not recognized: " + secondPart);
331
            }
332
            //TODO
333
            Reference<?> relRef = null;
334
            state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
335
        }else if (nomInvalMatcher.matches()){
336
            String firstPart = nomInvalMatcher.group(1);
337
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
338
            NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
339
            name.addStatus(status);
340
            state.getCurrentTaxon().addSynonymName(name, SynonymRelationshipType.SYNONYM_OF());
341
        }else if (acceptedMatcher.matches()){
342
            String firstPart = acceptedMatcher.group(1);
343
            String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
344
            handleHomotypicGroup(firstPart, state, (BotanicalName)state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
345
        }else if(heterotypicMatcher.matches()){
346
            String firstPart = heterotypicMatcher.group(1).trim();
347
            String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
348
            String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
349
            boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
350
            boolean isHomonym = firstPart.trim().matches(HOMONYM_MARKER);
351
            firstPart = normalizeStatus(firstPart);
352
            BotanicalName synName = (BotanicalName)nameParser.parseReferencedName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
353
            if (synName.isProtectedTitleCache()){
354
                logger.warn(line + "heterotypic base synonym could not be parsed correctly:" + firstPart);
355
            }
356
            if (isHomonym){
357
                homonyms.add(synName);
358
            }
359
            SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
360
            sr.getSynonym().setDoubtful(isDoubtful);
361
            handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
362
        }else{
363
            logger.warn(line + "Synonym entry does not match: " + synonymStr);
364
        }
365
    }
366

    
367

    
368

    
369
    /**
370
     * @param synonymStr
371
     * @param state
372
     * @param homonyms
373
     * @param homonymPart
374
     * @param isDoubtful
375
     * @param taxon
376
     * @param homotypicalGroup
377
     */
378
    private void handleHomotypicGroup(String homotypicStr,
379
            CubaImportState state,
380
            BotanicalName homotypicName,
381
            boolean isHeterotypic,
382
            List<BotanicalName> homonyms,
383
            String homonymPart,
384
            boolean isDoubtful) {
385

    
386
        if (homotypicStr == null){
387
            return;
388
        }else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
389
            homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
390
        }
391

    
392
        BotanicalName currentBasionym = homotypicName;
393
        String[] splits = homotypicStr.split("\\s*,\\s*");
394
        for (String split : splits){
395
            boolean isHomonym = split.trim().matches(HOMONYM_MARKER);
396
            String singleName = normalizeStatus(split);
397
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(singleName, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
398
            if (newName.isProtectedTitleCache()){
399
                logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
400
            }
401
            if (isHomonym){
402
                homonyms.add(newName);
403
            }
404
            if (isHeterotypic){
405
                SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(newName, homotypicName.getHomotypicalGroup(), null, null);
406
                sr.getSynonym().setDoubtful(isDoubtful);
407
//                newName.addBasionym(homotypicName);
408
                currentBasionym = handleBasionym(currentBasionym, newName);
409
            }else{
410
                state.getCurrentTaxon().addHomotypicSynonymName(newName, null, null);
411
                handleBasionym(currentBasionym, newName);
412
            }
413
        }
414
        makeHomonyms(homonyms, homonymPart, state);
415
    }
416

    
417

    
418
    /**
419
     * @param homonyms
420
     * @param homonymPart
421
     * @param state
422
     */
423
    private void makeHomonyms(List<BotanicalName> homonyms, String homonymPart, CubaImportState state) {
424
        String line = state.getCurrentLine() + ": ";
425
        homonymPart = homonymPart == null ? "" : homonymPart.trim();
426
        if (homonyms.isEmpty() && homonymPart.equals("")){
427
            return;
428
        }else if (homonymPart.equals("")){
429
            logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
430
            return;
431
        }
432
        homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
433
        String[] splits = homonymPart.split("\\]\\s*\\[");
434
        if (splits.length != homonyms.size()){
435
            logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
436
            return;
437
        }
438
        int i = 0;
439
        for (String split : splits){
440
            split = split.replaceAll("^non\\s+", "");
441
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
442
            if (newName.isProtectedTitleCache()){
443
                logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
444
            }
445
            newName.addRelationshipToName(homonyms.get(i), NameRelationshipType.LATER_HOMONYM(), null);
446
            i++;
447
        }
448
    }
449

    
450

    
451
    /**
452
     * @param newName
453
     * @param homotypicName
454
     * @return
455
     */
456
    private BotanicalName handleBasionym(BotanicalName currentBasionym, BotanicalName name2) {
457
        BotanicalName basionymName = currentBasionym;
458
        BotanicalName newCombination = name2;
459
        //switch if necessary
460
        if (basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
461
            basionymName = name2;
462
            newCombination = currentBasionym;
463
        }
464
        if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())){
465
            newCombination.getHomotypicalGroup().setGroupBasionym(basionymName);
466
        }
467
        return basionymName;
468
    }
469

    
470

    
471
    /**
472
     * @param combinationAuthorship
473
     * @param basi
474
     * @return
475
     */
476
    private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
477
        if (author1 == null || author2 == null){
478
            return false;
479
        }else {
480
            return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
481
        }
482
    }
483

    
484

    
485
    /**
486
     * @param record
487
     * @param state
488
     * @param taxon
489
     */
490
    private void makeNotes(HashMap<String, String> record, CubaImportState state) {
491
        String notesStr = getValue(record, "(Notas)");
492
        if (notesStr == null){
493
            return;
494
        }else{
495
            Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
496
            //TODO
497
            annotation.setAnnotationType(AnnotationType.EDITORIAL());
498
            state.getCurrentTaxon().addAnnotation(annotation);
499
        }
500
    }
501

    
502

    
503
    /**
504
     * @param record
505
     * @param state
506
     * @param familyTaxon
507
     * @return
508
     */
509
    private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
510
        String taxonStr = getValue(record, "Taxón");
511
        if (taxonStr == null){
512
            return isSynonym ? state.getCurrentTaxon() : null;
513
        }
514
        boolean isAbsent = false;
515
        if (taxonStr.startsWith("[") && taxonStr.endsWith("]")){
516
            taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
517
            isAbsent = true;
518
        }
519
        taxonStr = normalizeStatus(taxonStr);
520

    
521
        BotanicalName botanicalName = (BotanicalName)nameParser.parseReferencedName(taxonStr, nc, Rank.SPECIES());
522
        Reference<?> sec = getSecReference(state);
523
        Taxon taxon = Taxon.NewInstance(botanicalName, sec);
524
        TaxonNode higherNode;
525
        if (botanicalName.isProtectedTitleCache()){
526
            logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStr);
527
            higherNode = familyNode;
528
        }else{
529
            String genusStr = botanicalName.getGenusOrUninomial();
530
            Taxon genus = state.getHigherTaxon(genusStr);
531
            if (genus != null){
532
                higherNode = genus.getTaxonNodes().iterator().next();
533
            }else{
534
                BotanicalName name = BotanicalName.NewInstance(Rank.GENUS());
535
                name.setGenusOrUninomial(genusStr);
536
                genus = Taxon.NewInstance(name, sec);
537
                higherNode = familyNode.addChildTaxon(genus, null, null);
538
                state.putHigherTaxon(genusStr, genus);
539
            }
540
        }
541

    
542
        higherNode.addChildTaxon(taxon, null, null);
543

    
544
        return taxon;
545
    }
546

    
547
    /**
548
     * @param state
549
     * @return
550
     */
551
    private Reference<?> getSecReference(CubaImportState state) {
552
        Reference<?> result = state.getSecReference();
553
        if (result == null){
554
            result = ReferenceFactory.newDatabase();
555
            result.setTitle("Flora of Cuba");
556
            state.setSecReference(result);
557
        }
558
        return result;
559
    }
560

    
561

    
562
    private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
563
            "nom. rej.","nom. cons. prop.","nom. altern."};
564
    /**
565
     * @param taxonStr
566
     * @return
567
     */
568
    private String normalizeStatus(String taxonStr) {
569
        if (taxonStr == null){
570
            return null;
571
        }
572
        for (String nomStatusStr : nomStatusStrings){
573
            nomStatusStr = " " + nomStatusStr;
574
            if (taxonStr.endsWith(nomStatusStr)){
575
                taxonStr = taxonStr.replace(nomStatusStr, "," + nomStatusStr);
576
            }
577
        }
578
        taxonStr = taxonStr.replaceAll(HOMONYM_MARKER, "").trim();
579
        taxonStr = taxonStr.replaceAll(DOUBTFUL_MARKER, "").trim();
580
        return taxonStr;
581

    
582

    
583
    }
584

    
585

    
586
    /**
587
     * @param record
588
     * @param state
589
     * @return
590
     */
591
    private TaxonNode getFamilyTaxon(HashMap<String, String> record, CubaImportState state) {
592
        String familyStr = getValue(record, "Fam.");
593
        if (familyStr == null){
594
            return null;
595
        }
596
        Taxon family = state.getHigherTaxon(familyStr);
597
        TaxonNode familyNode;
598
        if (family != null){
599
            familyNode = family.getTaxonNodes().iterator().next();
600
        }else{
601
            BotanicalName name = BotanicalName.NewInstance(Rank.FAMILY());
602
            name.setGenusOrUninomial(familyStr);
603
            Reference<?> sec = getSecReference(state);
604
            Taxon taxon = Taxon.NewInstance(name, sec);
605
            ITaxonTreeNode rootNode = getClassification(state);
606
            familyNode = rootNode.addChildTaxon(taxon, sec, null);
607
            state.putHigherTaxon(familyStr, taxon);
608
        }
609

    
610
        return familyNode;
611
    }
612

    
613

    
614
    /**
615
     * @param state
616
     * @return
617
     */
618
    private TaxonNode getClassification(CubaImportState state) {
619
        Classification classification = state.getClassification();
620
        if (classification == null){
621
            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
622
        }
623
        TaxonNode rootNode = state.getRootNode();
624
        if (rootNode == null){
625
            rootNode = getTaxonNodeService().find(plantaeUuid);
626
        }
627
        if (rootNode == null){
628
            Reference<?> sec = getSecReference(state);
629
            if (classification == null){
630
                String classificationName = state.getConfig().getClassificationName();
631
                //TODO
632
                Language language = Language.DEFAULT();
633
                classification = Classification.NewInstance(classificationName, sec, language);
634
                state.setClassification(classification);
635
                classification.setUuid(state.getConfig().getClassificationUuid());
636
                classification.getRootNode().setUuid(rootUuid);
637
            }
638

    
639
            BotanicalName plantaeName = BotanicalName.NewInstance(Rank.KINGDOM());
640
            plantaeName.setGenusOrUninomial("Plantae");
641
            Taxon plantae = Taxon.NewInstance(plantaeName, sec);
642
            TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
643
            plantaeNode.setUuid(plantaeUuid);
644
            state.setRootNode(plantaeNode);
645
            getClassificationService().save(classification);
646

    
647
            rootNode = plantaeNode;
648
        }
649
        return rootNode;
650
    }
651

    
652

    
653
    /**
654
     * @param record
655
     * @param originalKey
656
     * @return
657
     */
658
    private String getValue(HashMap<String, String> record, String originalKey) {
659
        String value = record.get(originalKey);
660
        if (! StringUtils.isBlank(value)) {
661
        	if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
662
        	value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
663
        	return value;
664
        }else{
665
        	return null;
666
        }
667
    }
668

    
669

    
670

    
671
	/**
672
	 *  Stores taxa records in DB
673
	 */
674
	@Override
675
    protected void firstPass(CubaImportState state) {
676
	    boolean isSynonym = false;
677

    
678
        int line = state.getCurrentLine();
679
        HashMap<String, String> record = state.getOriginalRecord();
680

    
681
        Set<String> keys = record.keySet();
682
        for (String key: keys) {
683
            if (! expectedKeys.contains(key)){
684
                logger.warn("Unexpected Key: " + key);
685
            }
686
        }
687

    
688
        if (record.get("Fam.") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
689
            //second header line, don't handle
690
            return;
691
        }
692

    
693
        //Fam.
694
        TaxonNode familyTaxon = getFamilyTaxon(record, state);
695
        if (familyTaxon == null){
696
            if (record.get("Taxón") != null){
697
                logger.warn(line + ": Family not recognized but taxon exists: " + record.get("Taxón"));
698
                return;
699
            }else if (record.get("Syn.") == null){
700
                logger.warn(line + ": Family not recognized but also no synonym exists");
701
                return;
702
            }else{
703
                isSynonym = true;
704
            }
705
        }
706

    
707
        //(Fam.)
708
        //TODO
709

    
710
        //Taxón
711
        Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonym);
712
        if (taxon == null && ! isSynonym){
713
            logger.warn(line + ": taxon could not be created and is null");
714
            return;
715
        }
716
        state.setCurrentTaxon(taxon);
717

    
718
        //(Notas)
719
        makeNotes(record, state);
720

    
721
        //Syn.
722
        makeSynonyms(record, state);
723

    
724
        //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
725
        makeCubanDistribution(record, state);
726

    
727

    
728
        // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
729
//        "CuC","VC","Ci","SS","CA","Cam","LT",
730
//        "CuE","Gr","Ho","SC","Gu",
731
//      "Esp","Ja","PR","Men","Bah","Cay",
732
//      "AmN","AmC","AmS","VM"});
733
        makeProvincesDistribution(record, state);
734

    
735
		return;
736
    }
737

    
738

    
739

    
740
	/**
741
     * @param record
742
     * @param state
743
     * @param taxon
744
     */
745
    // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
746
//  "CuC","VC","Ci","SS","CA","Cam","LT",
747
//  "CuE","Gr","Ho","SC","Gu",
748
    private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
749
        List<String> areaKeys = Arrays.asList(new String[]{
750
                "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
751
                "CuC","VC","Ci","SS","CA","Cam","LT",
752
                "CuE","Gr","Ho","SC","Gu",
753
                "Esp","Ja","PR","Men","Bah","Cay",
754
                "AmN","AmC","AmS","VM"});
755
        for (String areaKey : areaKeys){
756
            makeSingleProvinceDistribution(areaKey, record, state);
757
        }
758

    
759
    }
760

    
761

    
762
    /**
763
     * @param areaKey
764
     * @param record
765
     * @param state
766
     * @param taxon
767
     */
768
    private void makeSingleProvinceDistribution(String areaKey,
769
            HashMap<String, String> record,
770
            CubaImportState state) {
771
        try {
772
            UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
773
            if (areaUuid == null){
774
                logger.warn("Area not recognized: " + areaKey);
775
                return;
776
            }
777
            if (record.get(areaKey)==null){
778
                return; //no status defined
779
            }
780

    
781
            NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
782
            if (area == null){
783
                logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
784
            }
785
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
786
            PresenceAbsenceTerm status =  makeProvinceStatus(areaKey, record, state);
787
            if (status == null){
788
                logger.warn(state.getCurrentLine() + ": Distribution Status could not be defined: " + record.get(areaKey));
789
            }
790
            Distribution distribution = Distribution.NewInstance(area, status);
791
            desc.addElement(distribution);
792
        } catch (UndefinedTransformerMethodException e) {
793
            e.printStackTrace();
794
        }
795

    
796
    }
797

    
798

    
799
    /**
800
     * @param areaKey
801
     * @param record
802
     * @param state
803
     * @return
804
     * @throws UndefinedTransformerMethodException
805
     */
806
    private PresenceAbsenceTerm makeProvinceStatus(String areaKey, HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
807
        String statusStr = record.get(areaKey);
808
        if (statusStr == null){
809
            return null;
810
        }
811
        PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
812
        if (status == null){
813
            UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
814
            status = getPresenceTerm(state, statusUuid, null, null, null, false);
815
        }
816
        return status;
817
    }
818

    
819

    
820
    /**
821
	 *  Stores parent-child, synonym and common name relationships
822
	 */
823
	@Override
824
    protected void secondPass(CubaImportState state) {
825
//		CyprusRow cyprusRow = state.getCyprusRow();
826
		return;
827
	}
828

    
829

    
830
    @Override
831
    protected boolean isIgnore(CubaImportState state) {
832
        return ! state.getConfig().isDoTaxa();
833
    }
834

    
835
    @Override
836
    protected boolean doCheck(CubaImportState state) {
837
        logger.warn("DoCheck not yet implemented for CubaExcelImport");
838
        return true;
839
    }
840

    
841
}
(1-1/5)