Project

General

Profile

Download (25.7 KB) Statistics
| Branch: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.iapt;
11

    
12
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
13
import eu.etaxonomy.cdm.common.CdmUtils;
14
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
15
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
16
import eu.etaxonomy.cdm.model.common.*;
17
import eu.etaxonomy.cdm.model.name.*;
18
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
19
import eu.etaxonomy.cdm.model.occurrence.FieldUnit;
20
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
21
import eu.etaxonomy.cdm.model.reference.Reference;
22
import eu.etaxonomy.cdm.model.taxon.*;
23
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
24
import org.apache.commons.lang.ArrayUtils;
25
import org.apache.commons.lang.StringEscapeUtils;
26
import org.apache.commons.lang.StringUtils;
27
import org.apache.log4j.Level;
28
import org.apache.log4j.Logger;
29
import org.springframework.stereotype.Component;
30

    
31
import java.util.*;
32
import java.util.regex.Matcher;
33
import java.util.regex.Pattern;
34

    
35
/**
36
 * @author a.mueller
37
 * @created 05.01.2016
38
 */
39

    
40
@Component("iAPTExcelImport")
41
public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends SimpleExcelTaxonImport<CONFIG> {
42
    private static final long serialVersionUID = -747486709409732371L;
43
    private static final Logger logger = Logger.getLogger(IAPTExcelImport.class);
44
    public static final String ANNOTATION_MARKER_STRING = "[*]";
45

    
46

    
47
    private static UUID ROOT_UUID = UUID.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
48

    
49
    private static NonViralNameParserImpl nameParser = NonViralNameParserImpl.NewInstance();
50

    
51
    private final static String REGISTRATIONNO_PK= "RegistrationNo_Pk";
52
    private final static String HIGHERTAXON= "HigherTaxon";
53
    private final static String FULLNAME= "FullName";
54
    private final static String AUTHORSSPELLING= "AuthorsSpelling";
55
    private final static String LITSTRING= "LitString";
56
    private final static String REGISTRATION= "Registration";
57
    private final static String TYPE= "Type";
58
    private final static String CAVEATS= "Caveats";
59
    private final static String FULLBASIONYM= "FullBasionym";
60
    private final static String FULLSYNSUBST= "FullSynSubst";
61
    private final static String NOTESTXT= "NotesTxt";
62
    private final static String REGDATE= "RegDate";
63
    private final static String NAMESTRING= "NameString";
64
    private final static String BASIONYMSTRING= "BasionymString";
65
    private final static String SYNSUBSTSTR= "SynSubstStr";
66
    private final static String AUTHORSTRING= "AuthorString";
67

    
68
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
69
            REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING});
70

    
71
    private static final Pattern nomRefTokenizeP = Pattern.compile("^(.*):\\s([^\\.:]+)\\.(.*)$");
72
    private static final Pattern nomRefPubYearExtractP = Pattern.compile("(.*?)(1[7,8,9][0-9]{2}).*$|^.*?[0-9]{1,2}([\\./])[0-1]?[0-9]\\3([0-9]{2})\\.$"); // 1700 - 1999
73
    private static final Pattern typeSplitPattern =  Pattern.compile("^(?:\"*[Tt]ype: (?<type>.*?))(?:[Hh]olotype:(?<holotype>.*?))?(?:[Ii]sotype[^:]*:(?<isotype>.*))?$");
74
    enum TypesName {
75
        type, holotype, isotype;
76

    
77
        public SpecimenTypeDesignationStatus status(){
78
            switch (this) {
79
                case holotype:
80
                    return SpecimenTypeDesignationStatus.HOLOTYPE();
81
                case isotype:
82
                    return SpecimenTypeDesignationStatus.ISOTYPE();
83
                default:
84
                    return null;
85
            }
86
        }
87
    }
88

    
89
    private MarkerType markerTypeFossil = null;
90
    private Rank rankUnrankedSupraGeneric = null;
91
    private Rank familyIncertisSedis = null;
92
    private AnnotationType annotationTypeCaveats = null;
93

    
94
    private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
95
                            TaxonNode higherTaxonNode, boolean isFossil) {
96

    
97
        String line = state.getCurrentLine() + ": ";
98

    
99
        String titleCacheStr = getValue(record, FULLNAME, true);
100
        String nameStr = getValue(record, NAMESTRING, true);
101
        String authorStr = getValue(record, AUTHORSTRING, true);
102
        String nomRefStr = getValue(record, LITSTRING, true);
103
        String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
104
        String notesTxt = getValue(record, NOTESTXT, true);
105
        String caveats = getValue(record, CAVEATS, true);
106
        String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
107
        String synSubstStr = getValue(record, SYNSUBSTSTR, true);
108
        String typeStr = getValue(record, TYPE, true);
109

    
110
        String nomRefTitle = null;
111
        String nomRefDetail = null;
112
        String nomRefPupDate = null;
113
        String nomRefPupYear = null;
114

    
115
        // preprocess nomRef: separate citation, reference detail, publishing date
116
        if(!StringUtils.isEmpty(nomRefStr)){
117
            nomRefStr = nomRefStr.trim();
118
            Matcher m = nomRefTokenizeP.matcher(nomRefStr);
119
            if(m.matches()){
120
                nomRefTitle = m.group(1);
121
                nomRefDetail = m.group(2);
122
                nomRefPupDate = m.group(3);
123

    
124
                // nomRefDetail.replaceAll("[\\:\\.\\s]", ""); // TODO integrate into nomRefTokenizeP
125
                Matcher m2 = nomRefPubYearExtractP.matcher(nomRefPupDate);
126
                if(m2.matches()){
127
                    nomRefPupYear = m2.group(2);
128
                    if(nomRefPupYear == null){
129
                        nomRefPupYear = m2.group(4);
130
                    }
131
                    if(nomRefPupYear == null){
132
                        logger.error("nomRefPupYear in " + nomRefStr + " is  NULL" );
133
                    }
134
                    if(nomRefPupYear.length() == 2 ){
135
                        // it is an abbreviated year from the 19** years
136
                        nomRefPupYear = "19" + nomRefPupYear;
137
                    }
138
                    nomRefTitle = nomRefTitle + ": " + nomRefDetail + ". " + nomRefPupYear + ".";
139
                } else {
140
                    logger.warn("Pub year not found in " + nomRefStr );
141
                    // FIXME in in J. Eur. Orchideen 30: 128. 30.09.97 (Vorabdr.).
142

    
143
                }
144

    
145
            } else {
146
                nomRefTitle = nomRefStr;
147
            }
148
        }
149

    
150
        BotanicalName taxonName = makeBotanicalName(state, titleCacheStr, nameStr, authorStr, nomRefTitle);
151

    
152
        if(!StringUtils.isEmpty(notesTxt)){
153
            notesTxt = notesTxt.replace("Notes: ", "").trim();
154
            taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
155
        }
156
        if(!StringUtils.isEmpty(caveats)){
157
            caveats = caveats.replace("Caveats: ", "").trim();
158
            taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
159
        }
160
        //
161

    
162
        // Namerelations
163
        if(!StringUtils.isEmpty(authorsSpelling)){
164
            authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
165

    
166
            String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
167
            String[] nameStrTokens = StringUtils.split(nameStr, " ");
168

    
169
            ArrayUtils.reverse(authorSpellingTokens);
170
            ArrayUtils.reverse(nameStrTokens);
171

    
172
            for (int i = 0; i < nameStrTokens.length; i++){
173
                if(i < authorSpellingTokens.length){
174
                    nameStrTokens[i] = authorSpellingTokens[i];
175
                }
176
            }
177
            ArrayUtils.reverse(nameStrTokens);
178

    
179
            String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
180
            // build the fullnameString of the misspelled name
181
            misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
182

    
183
            TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
184
            misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
185
            getNameService().save(misspelledName);
186
        }
187

    
188
        // Replaced Synonyms
189
        if(!StringUtils.isEmpty(fullSynSubstStr)){
190
            fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
191
            BotanicalName replacedSynonymName = makeBotanicalName(state, fullSynSubstStr, synSubstStr, null, null);
192
            replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
193
            getNameService().save(replacedSynonymName);
194
        }
195

    
196
        Reference sec = state.getConfig().getSecReference();
197
        Taxon taxon = Taxon.NewInstance(taxonName, sec);
198

    
199
        // Markers
200
        if(isFossil){
201
            taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
202
        }
203

    
204
        // Types
205
        if(!StringUtils.isEmpty(typeStr)){
206
            Matcher m = typeSplitPattern.matcher(typeStr);
207

    
208
            if(m.matches()){
209
                String typeString = m.group(TypesName.type.name());
210
                boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km
211

    
212
                if(isFieldUnit) {
213
                    // type as fieldUnit
214
                    FieldUnit fu = FieldUnit.NewInstance();
215
                    fu.setTitleCache(typeString, true);
216
                    getOccurrenceService().save(fu);
217

    
218
                    // all others ..
219
                    addSpecimenTypes(taxonName, fu, m.group(TypesName.holotype.name()), TypesName.holotype, false);
220
                    addSpecimenTypes(taxonName, fu, m.group(TypesName.isotype.name()), TypesName.isotype, true);
221
                } else {
222
                    TaxonNameBase typeName = nameParser.parseFullName(typeString);
223
                    taxonName.addNameTypeDesignation(typeName, null, null, null, NameTypeDesignationStatus.AUTOMATIC(), true, true, true, true);
224
                }
225
            }
226
            getNameService().save(taxonName);
227

    
228
        }
229

    
230
        getTaxonService().save(taxon);
231
        if(higherTaxonNode != null){
232
            higherTaxonNode.addChildTaxon(taxon, null, null);
233
            getTaxonNodeService().save(higherTaxonNode);
234
        }
235

    
236
        return taxon;
237

    
238
    }
239

    
240
    private void addSpecimenTypes(BotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple){
241
        if(StringUtils.isEmpty(typeStr)){
242
            return;
243
        }
244
        typeStr = typeStr.trim().replaceAll("\\.$", "");
245

    
246
        List<String> typeData = new ArrayList<>();
247
        if(multiple){
248
            String[] tokens = typeStr.split("\\s?,\\s?");
249
            for (String t : tokens) {
250
                if(!t.isEmpty()){
251
                    typeData.add(t.trim());
252
                }
253
            }
254
        } else {
255
            typeData.add(typeStr.trim());
256
        }
257

    
258
        for(String type : typeData){
259
            DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.OtherSpecimen, fieldUnit);
260
            facade.setTitleCache(type, true);
261
            DerivedUnit specimen = facade.innerDerivedUnit();
262
            taxonName.addSpecimenTypeDesignation(specimen, typeName.status(), null, null, null, false, true);
263
       }
264
    }
265

    
266
    private BotanicalName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String titleCacheStr, String nameStr, String authorStr, String nomRefTitle) {
267

    
268
        BotanicalName taxonName;// cache field for the taxonName.titleCache
269
        String taxonNameTitleCache = null;
270
        Map<String, AnnotationType> nameAnnotations = new HashMap<>();
271

    
272
        String line = state.getCurrentLine() + ": ";
273

    
274
        // TitleCache preprocessing
275
        if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
276
            nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
277
            titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
278
            authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
279
        }
280

    
281
        // parse the full taxon name
282
        if(!StringUtils.isEmpty(nomRefTitle)){
283
            String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
284
            String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
285
            logger.debug(":::::" + taxonFullNameStr);
286
            taxonName = (BotanicalName) nameParser.parseReferencedName(taxonFullNameStr, NomenclaturalCode.ICNAFP, null);
287
        } else {
288
            taxonName = (BotanicalName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
289
        }
290

    
291
        taxonNameTitleCache = taxonName.getTitleCache().trim();
292
        if (taxonName.isProtectedTitleCache()) {
293
            logger.warn(line + "Name could not be parsed: " + titleCacheStr);
294
        } else {
295

    
296
            boolean doRestoreTitleCacheStr = false;
297

    
298
            // Check if titleCache and nameCache are plausible
299
            String titleCacheCompareStr = titleCacheStr;
300
            String nameCache = taxonName.getNameCache();
301
            String nameCompareStr = nameStr;
302
            if(taxonName.isBinomHybrid()){
303
                titleCacheCompareStr = titleCacheCompareStr.replace(" x ", " ×");
304
                nameCompareStr = nameCompareStr.replace(" x ", " ×");
305
            }
306
            if(taxonName.isMonomHybrid()){
307
                titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
308
                nameCompareStr = nameCompareStr.replace("^X ", "× ");
309
            }
310
            if(authorStr != null && authorStr.contains(" et ")){
311
                titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
312
            }
313
            if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
314
                logger.warn(line + "The generated titleCache differs from the imported string : " + taxonNameTitleCache + " <> " + titleCacheStr + " will restore original titleCacheStr");
315
                doRestoreTitleCacheStr = true;
316
            }
317
            if (!nameCache.trim().equals(nameCompareStr)) {
318
                logger.warn(line + "The parsed nameCache differs from " + NAMESTRING + " : " + nameCache + " <> " + nameCompareStr);
319
            }
320

    
321
            //  Author
322
            //nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
323
            //if (!titleCacheStr.equals(taxonName.getTitleCache())) {
324
            //    logger.warn(line + "titleCache has changed after setting authors, will restore original titleCacheStr");
325
            //    doRestoreTitleCacheStr = true;
326
            //}
327

    
328
            if(doRestoreTitleCacheStr){
329
                taxonName.setTitleCache(titleCacheStr, true);
330
            }
331

    
332
            // deduplicate
333
            replaceAuthorNamesAndNomRef(state, taxonName);
334
        }
335

    
336
        // Annotations
337
        if(!nameAnnotations.isEmpty()){
338
            for(String text : nameAnnotations.keySet()){
339
                taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
340
            }
341
            getNameService().save(taxonName);
342
        }
343
        return taxonName;
344
    }
345

    
346
    /**
347
     * @param state
348
     * @return
349
     */
350
    private TaxonNode getClassificationRootNode(IAPTImportState state) {
351

    
352
     //   Classification classification = state.getClassification();
353
     //   if (classification == null){
354
     //       IAPTImportConfigurator config = state.getConfig();
355
     //       classification = Classification.NewInstance(state.getConfig().getClassificationName());
356
     //       classification.setUuid(config.getClassificationUuid());
357
     //       classification.setReference(config.getSecReference());
358
     //       classification = getClassificationService().find(state.getConfig().getClassificationUuid());
359
     //   }
360
        TaxonNode rootNode = state.getRootNode();
361
        if (rootNode == null){
362
            rootNode = getTaxonNodeService().find(ROOT_UUID);
363
        }
364
        if (rootNode == null){
365
            Classification classification = state.getClassification();
366
            if (classification == null){
367
                Reference sec = state.getSecReference();
368
                String classificationName = state.getConfig().getClassificationName();
369
                Language language = Language.DEFAULT();
370
                classification = Classification.NewInstance(classificationName, sec, language);
371
                state.setClassification(classification);
372
                classification.setUuid(state.getConfig().getClassificationUuid());
373
                classification.getRootNode().setUuid(ROOT_UUID);
374
                getClassificationService().save(classification);
375
            }
376
            rootNode = classification.getRootNode();
377
            state.setRootNode(rootNode);
378
        }
379
        return rootNode;
380
    }
381

    
382

    
383
    /**
384
     * @param record
385
     * @param originalKey
386
     * @param doUnescapeHtmlEntities
387
     * @return
388
     */
389
    private String getValue(HashMap<String, String> record, String originalKey, boolean doUnescapeHtmlEntities) {
390
        String value = record.get(originalKey);
391

    
392
        value = fixCharacters(value);
393

    
394
        if (! StringUtils.isBlank(value)) {
395
        	if (logger.isDebugEnabled()) {
396
        	    logger.debug(originalKey + ": " + value);
397
        	}
398
        	value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
399
            if(doUnescapeHtmlEntities){
400
                value = StringEscapeUtils.unescapeHtml(value);
401
            }
402
        	return value.trim();
403
        }else{
404
        	return null;
405
        }
406
    }
407

    
408
    /**
409
     * Fixes broken characters.
410
     * For details see
411
     * http://dev.e-taxonomy.eu/redmine/issues/6035
412
     *
413
     * @param value
414
     * @return
415
     */
416
    private String fixCharacters(String value) {
417

    
418
        value = StringUtils.replace(value, "s$K", "š");
419
        value = StringUtils.replace(value, "n$K", "ň");
420
        value = StringUtils.replace(value, "e$K", "ě");
421
        value = StringUtils.replace(value, "r$K", "ř");
422
        value = StringUtils.replace(value, "c$K", "č");
423
        value = StringUtils.replace(value, "z$K", "ž");
424
        value = StringUtils.replace(value, "S>U$K", "Š");
425
        value = StringUtils.replace(value, "C>U$K", "Č");
426
        value = StringUtils.replace(value, "R>U$K", "Ř");
427
        value = StringUtils.replace(value, "Z>U$K", "Ž");
428
        value = StringUtils.replace(value, "g$K", "ǧ");
429
        value = StringUtils.replace(value, "s$A", "ś");
430
        value = StringUtils.replace(value, "n$A", "ń");
431
        value = StringUtils.replace(value, "c$A", "ć");
432
        value = StringUtils.replace(value, "e$E", "ę");
433
        value = StringUtils.replace(value, "o$H", "õ");
434
        value = StringUtils.replace(value, "s$C", "ş");
435
        value = StringUtils.replace(value, "t$C", "ț");
436
        value = StringUtils.replace(value, "S>U$C", "Ş");
437
        value = StringUtils.replace(value, "a$O", "å");
438
        value = StringUtils.replace(value, "A>U$O", "Å");
439
        value = StringUtils.replace(value, "u$O", "ů");
440
        value = StringUtils.replace(value, "g$B", "ğ");
441
        value = StringUtils.replace(value, "g$B", "ĕ");
442
        value = StringUtils.replace(value, "a$B", "ă");
443
        value = StringUtils.replace(value, "l$/", "ł");
444
        value = StringUtils.replace(value, ">i", "ı");
445
        value = StringUtils.replace(value, "i$U", "ï");
446
        // Special-cases
447
        value = StringUtils.replace(value, "&yacute", "ý");
448
        value = StringUtils.replace(value, "<L", "Ł");
449
        value = StringUtils.replace(value, "E>U$D", "З");
450
        value = StringUtils.replace(value, "S>U$E", "Ş");
451
        value = StringUtils.replace(value, "s$E", "ş");
452

    
453
        return value;
454
    }
455

    
456

    
457
    /**
458
	 *  Stores taxa records in DB
459
	 */
460
	@Override
461
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
462

    
463
        String lineNumber = state.getCurrentLine() + ": ";
464
        logger.setLevel(Level.DEBUG);
465
        HashMap<String, String> record = state.getOriginalRecord();
466
        logger.debug(lineNumber + record.toString());
467

    
468
        Set<String> keys = record.keySet();
469
        for (String key: keys) {
470
            if (! expectedKeys.contains(key)){
471
                logger.warn(lineNumber + "Unexpected Key: " + key);
472
            }
473
        }
474

    
475
        String reg_id = record.get(REGISTRATIONNO_PK);
476

    
477
        //higherTaxon
478
        String higherTaxaString = record.get(HIGHERTAXON);
479
        boolean isFossil = false;
480
        if(higherTaxaString.startsWith("FOSSIL ")){
481
            higherTaxaString = higherTaxaString.replace("FOSSIL ", "");
482
            isFossil = true;
483
        }
484
        TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state);
485

    
486
       //Taxon
487
        Taxon taxon = makeTaxon(record, state, higherTaxon, isFossil);
488
        if (taxon == null){
489
            logger.warn(lineNumber + "taxon could not be created and is null");
490
            return;
491
        }
492
        ((IAPTImportState)state).setCurrentTaxon(taxon);
493

    
494

    
495
		return;
496
    }
497

    
498
    private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
499
        String[] higherTaxaNames = higherTaxaString.toLowerCase().replaceAll("[\\[\\]]", "").split(":");
500
        TaxonNode higherTaxonNode = null;
501

    
502
        ITaxonTreeNode rootNode = getClassificationRootNode(state);
503
        for (String htn :  higherTaxaNames) {
504
            htn = StringUtils.capitalize(htn.trim());
505
            Taxon higherTaxon = state.getHigherTaxon(htn);
506
            if (higherTaxon != null){
507
                higherTaxonNode = higherTaxon.getTaxonNodes().iterator().next();
508
            }else{
509
                BotanicalName name = makeHigherTaxonName(state, htn);
510
                Reference sec = state.getSecReference();
511
                higherTaxon = Taxon.NewInstance(name, sec);
512
                getTaxonService().save(higherTaxon);
513
                higherTaxonNode = rootNode.addChildTaxon(higherTaxon, sec, null);
514
                state.putHigherTaxon(htn, higherTaxon);
515
                getClassificationService().saveTreeNode(higherTaxonNode);
516
            }
517
            rootNode = higherTaxonNode;
518
        }
519
        return higherTaxonNode;
520
    }
521

    
522
    private BotanicalName makeHigherTaxonName(IAPTImportState state, String name) {
523

    
524
        Rank rank = guessRank(name);
525

    
526
        BotanicalName taxonName = BotanicalName.NewInstance(rank);
527
        taxonName.addSource(makeOriginalSource(state));
528
        taxonName.setGenusOrUninomial(StringUtils.capitalize(name));
529
        return taxonName;
530
    }
531

    
532
    private Rank guessRank(String name) {
533

    
534
        // normalize
535
        name = name.replaceAll("\\(.*\\)", "").trim();
536

    
537
        if(name.matches("^Plantae$|^Fungi$")){
538
           return Rank.KINGDOM();
539
        } else if(name.matches("^Incertae sedis$|^No group assigned$")){
540
           return rankFamilyIncertisSedis();
541
        } else if(name.matches(".*phyta$|.*mycota$")){
542
           return Rank.SECTION_BOTANY();
543
        } else if(name.matches(".*phytina$|.*mycotina$")){
544
           return Rank.SUBSECTION_BOTANY();
545
        } else if(name.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones
546
            return rankUnrankedSupraGeneric();
547
        } else if(name.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){
548
           return Rank.CLASS();
549
        } else if(name.matches(".*idae$|.*phycidae$|.*mycetidae$")){
550
           return Rank.SUBCLASS();
551
        } else if(name.matches(".*ales$")){
552
           return Rank.ORDER();
553
        } else if(name.matches(".*ineae$")){
554
           return Rank.SUBORDER();
555
        } else if(name.matches(".*aceae$")){
556
            return Rank.FAMILY();
557
        } else if(name.matches(".*oideae$")){
558
           return Rank.SUBFAMILY();
559
        } else
560
        //    if(name.matches(".*eae$")){
561
        //    return Rank.TRIBE();
562
        // } else
563
            if(name.matches(".*inae$")){
564
           return Rank.SUBTRIBE();
565
        } else if(name.matches(".*ae$")){
566
           return Rank.FAMILY();
567
        }
568
        return Rank.UNKNOWN_RANK();
569
    }
570

    
571
    private Rank rankUnrankedSupraGeneric() {
572

    
573
        if(rankUnrankedSupraGeneric == null){
574
            rankUnrankedSupraGeneric = Rank.NewInstance(RankClass.Suprageneric, "Unranked supra generic", " ", " ");
575
            getTermService().save(rankUnrankedSupraGeneric);
576
        }
577
        return rankUnrankedSupraGeneric;
578
    }
579

    
580
    private Rank rankFamilyIncertisSedis() {
581

    
582
        if(familyIncertisSedis == null){
583
            familyIncertisSedis = Rank.NewInstance(RankClass.Suprageneric, "Family incertis sedis", " ", " ");
584
            getTermService().save(familyIncertisSedis);
585
        }
586
        return familyIncertisSedis;
587
    }
588

    
589
    private AnnotationType annotationTypeCaveats(){
590
        if(annotationTypeCaveats == null){
591
            annotationTypeCaveats = AnnotationType.NewInstance("Caveats", "Caveats", "");
592
            getTermService().save(annotationTypeCaveats);
593
        }
594
        return annotationTypeCaveats;
595
    }
596

    
597

    
598
    /**
599
     * @param state
600
     * @return
601
     */
602
    private IdentifiableSource makeOriginalSource(IAPTImportState state) {
603
        return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
604
    }
605

    
606

    
607
    private Reference makeReference(IAPTImportState state, UUID uuidRef) {
608
        Reference ref = state.getReference(uuidRef);
609
        if (ref == null){
610
            ref = getReferenceService().find(uuidRef);
611
            state.putReference(uuidRef, ref);
612
        }
613
        return ref;
614
    }
615

    
616
    private MarkerType markerTypeFossil(){
617
        if(this.markerTypeFossil == null){
618
            markerTypeFossil = MarkerType.NewInstance("isFossilTaxon", "isFossil", null);
619
            getTermService().save(this.markerTypeFossil);
620
        }
621
        return markerTypeFossil;
622
    }
623

    
624

    
625
}
(1-1/4)