Project

General

Profile

Download (45.8 KB) Statistics
| Branch: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.iapt;
11

    
12
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
13
import eu.etaxonomy.cdm.common.CdmUtils;
14
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
15
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
16
import eu.etaxonomy.cdm.model.agent.Institution;
17
import eu.etaxonomy.cdm.model.agent.Person;
18
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
19
import eu.etaxonomy.cdm.model.common.*;
20
import eu.etaxonomy.cdm.model.name.*;
21
import eu.etaxonomy.cdm.model.occurrence.*;
22
import eu.etaxonomy.cdm.model.occurrence.Collection;
23
import eu.etaxonomy.cdm.model.reference.Reference;
24
import eu.etaxonomy.cdm.model.reference.ReferenceType;
25
import eu.etaxonomy.cdm.model.taxon.*;
26
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
27
import org.apache.commons.lang.ArrayUtils;
28
import org.apache.commons.lang.StringEscapeUtils;
29
import org.apache.commons.lang.StringUtils;
30
import org.apache.log4j.Level;
31
import org.apache.log4j.Logger;
32
import org.joda.time.DateTimeFieldType;
33
import org.joda.time.Partial;
34
import org.joda.time.format.DateTimeFormat;
35
import org.joda.time.format.DateTimeFormatter;
36
import org.springframework.stereotype.Component;
37

    
38
import java.util.*;
39
import java.util.regex.Matcher;
40
import java.util.regex.Pattern;
41

    
42
/**
43
 * @author a.mueller
44
 * @created 05.01.2016
45
 */
46

    
47
@Component("iAPTExcelImport")
48
public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends SimpleExcelTaxonImport<CONFIG> {
49
    private static final long serialVersionUID = -747486709409732371L;
50
    private static final Logger logger = Logger.getLogger(IAPTExcelImport.class);
51
    public static final String ANNOTATION_MARKER_STRING = "[*]";
52

    
53

    
54
    private static UUID ROOT_UUID = UUID.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
55

    
56
    private static NonViralNameParserImpl nameParser = NonViralNameParserImpl.NewInstance();
57

    
58
    private final static String REGISTRATIONNO_PK= "RegistrationNo_Pk";
59
    private final static String HIGHERTAXON= "HigherTaxon";
60
    private final static String FULLNAME= "FullName";
61
    private final static String AUTHORSSPELLING= "AuthorsSpelling";
62
    private final static String LITSTRING= "LitString";
63
    private final static String REGISTRATION= "Registration";
64
    private final static String TYPE= "Type";
65
    private final static String CAVEATS= "Caveats";
66
    private final static String FULLBASIONYM= "FullBasionym";
67
    private final static String FULLSYNSUBST= "FullSynSubst";
68
    private final static String NOTESTXT= "NotesTxt";
69
    private final static String REGDATE= "RegDate";
70
    private final static String NAMESTRING= "NameString";
71
    private final static String BASIONYMSTRING= "BasionymString";
72
    private final static String SYNSUBSTSTR= "SynSubstStr";
73
    private final static String AUTHORSTRING= "AuthorString";
74

    
75
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
76
            REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING});
77

    
78
    private static final Pattern nomRefTokenizeP = Pattern.compile("^(?<title>.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)\\.?$");
79
    private static final Pattern[] datePatterns = new Pattern[]{
80
            // NOTE:
81
            // The order of the patterns is extremely important!!!
82
            //
83
            // all patterns cover the years 1700 - 1999
84
            Pattern.compile("^(?<year>1[7,8,9][0-9]{2})$"), // only year, like '1969'
85
            Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
86
            Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
87
            Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
88
            Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<month>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
89
            Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
90
            Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
91
            Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),//  partial date like 1999-04
92
            Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
93
            Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
94
        };
95
    private static final Pattern typeSplitPattern =  Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
96

    
97
    private static final Pattern collectorPattern =  Pattern.compile(".*?\\(leg\\.\\s+([^\\)]*)\\)|.*?\\sleg\\.\\s+(.*?)\\.?$");
98
    private static final Pattern collectionDataPattern =  Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
99
    private static final Pattern collectorsNumber =  Pattern.compile("^([nN]o\\.\\s.*)$");
100

    
101
    // AccessionNumbers: , #.*, n°:?, 96/3293, No..*, -?\w{1,3}-[0-9\-/]*
102
    private static final Pattern accessionNumberOnlyPattern = Pattern.compile("^(?<accNumber>(?:n°\\:?\\s?|#|No\\.?\\s?)?[\\d\\w\\-/]*)$");
103

    
104
    private static final Pattern[] specimenTypePatterns = new Pattern[]{
105
            Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
106
            Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
107
            Pattern.compile("^(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<institute>.*?)(?<accNumber>Praep\\..*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
108
            Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
109
    };
110

    
111
    private static Map<String, Integer> monthFromNameMap = new HashMap<>();
112

    
113
    static {
114
        String[] ck = new String[]{"leden", "únor", "březen", "duben", "květen", "červen", "červenec ", "srpen", "září", "říjen", "listopad", "prosinec"};
115
        String[] fr = new String[]{"janvier", "février", "mars", "avril", "mai", "juin", "juillet", "août", "septembre", "octobre", "novembre", "décembre"};
116
        String[] de = new String[]{"januar", "februar", "märz", "april", "mai", "juni", "juli", "august", "september", "oktober", "november", "dezember"};
117
        String[] en = new String[]{"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"};
118
        String[] it = new String[]{"gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"};
119
        String[] sp = new String[]{"enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"};
120
        String[] de_abbrev = new String[]{"jan.", "feb.", "märz", "apr.", "mai", "jun.", "jul.", "aug.", "sept.", "okt.", "nov.", "dez."};
121
        String[] en_abbrev = new String[]{"jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sep.", "oct.", "nov.", "dec."};
122
        String[] port = new String[]{"Janeiro", "Fevereiro", "Março", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"};
123
        String[] rom_num = new String[]{"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii"};
124

    
125
        String[][] perLang =  new String[][]{ck, de, fr, en, it, sp, port, de_abbrev, en_abbrev, rom_num};
126

    
127
        for (String[] months: perLang) {
128
            for(int m = 1; m < 13; m++){
129
                monthFromNameMap.put(months[m - 1].toLowerCase(), m);
130
            }
131
        }
132

    
133
        // special cases
134
        monthFromNameMap.put("mar", 3);
135
        monthFromNameMap.put("dec", 12);
136
        monthFromNameMap.put("Februari", 2);
137
    }
138

    
139

    
140
    DateTimeFormatter formatterYear = DateTimeFormat.forPattern("yyyy");
141

    
142
    private Map<String, Collection> collectionMap = new HashMap<>();
143

    
144

    
145
    enum TypesName {
146
        fieldUnit, holotype, isotype;
147

    
148
        public SpecimenTypeDesignationStatus status(){
149
            switch (this) {
150
                case holotype:
151
                    return SpecimenTypeDesignationStatus.HOLOTYPE();
152
                case isotype:
153
                    return SpecimenTypeDesignationStatus.ISOTYPE();
154
                default:
155
                    return null;
156
            }
157
        }
158
    }
159

    
160
    private MarkerType markerTypeFossil = null;
161
    private Rank rankUnrankedSupraGeneric = null;
162
    private Rank familyIncertisSedis = null;
163
    private AnnotationType annotationTypeCaveats = null;
164

    
165
    private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
166
                            TaxonNode higherTaxonNode, boolean isFossil) {
167

    
168
        String regNumber = getValue(record, REGISTRATIONNO_PK, false);
169
        String regStr = getValue(record, REGISTRATION, true);
170
        String titleCacheStr = getValue(record, FULLNAME, true);
171
        String nameStr = getValue(record, NAMESTRING, true);
172
        String authorStr = getValue(record, AUTHORSTRING, true);
173
        String nomRefStr = getValue(record, LITSTRING, true);
174
        String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
175
        String notesTxt = getValue(record, NOTESTXT, true);
176
        String caveats = getValue(record, CAVEATS, true);
177
        String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
178
        String fullBasionymStr = getValue(record, FULLBASIONYM, true);
179
        String basionymNameStr = getValue(record, FULLBASIONYM, true);
180
        String synSubstStr = getValue(record, SYNSUBSTSTR, true);
181
        String typeStr = getValue(record, TYPE, true);
182

    
183

    
184
        String nomRefTitle = null;
185
        String nomRefDetail;
186
        String nomRefPupDate = null;
187
        String nomRefIssue = null;
188
        Partial pupDate = null;
189

    
190
        // preprocess nomRef: separate citation, reference detail, publishing date
191
        if(!StringUtils.isEmpty(nomRefStr)){
192
            nomRefStr = nomRefStr.trim();
193
            Matcher m = nomRefTokenizeP.matcher(nomRefStr);
194
            if(m.matches()){
195
                nomRefTitle = m.group("title");
196
                nomRefDetail = m.group("detail");
197
                nomRefPupDate = m.group("date").trim();
198
                nomRefIssue = m.group("issue");
199

    
200
                pupDate = parseDate(regNumber, nomRefPupDate);
201
                if (pupDate != null) {
202
                    nomRefTitle = nomRefTitle + ": " + nomRefDetail + ". " + pupDate.toString(formatterYear) + ".";
203
                } else {
204
                    logger.warn(csvReportLine(regNumber, "Pub date", nomRefPupDate, "in", nomRefStr, "not parsable"));
205
                }
206
            } else {
207
                nomRefTitle = nomRefStr;
208
            }
209
        }
210

    
211
        BotanicalName taxonName = makeBotanicalName(state, regNumber, titleCacheStr, nameStr, authorStr, nomRefTitle);
212

    
213
        // always add the original strings of parsed data as annotation
214
        taxonName.addAnnotation(Annotation.NewInstance("imported and parsed data strings:" +
215
                        "\n -  '" + LITSTRING + "': "+ nomRefStr +
216
                        "\n -  '" + TYPE + "': " + typeStr +
217
                        "\n -  '" + REGISTRATION  + "': " + regStr
218
                , AnnotationType.TECHNICAL(), Language.DEFAULT()));
219

    
220
        if(pupDate != null) {
221
            taxonName.getNomenclaturalReference().setDatePublished(TimePeriod.NewInstance(pupDate));
222
        }
223
        if(nomRefIssue != null) {
224
            taxonName.getNomenclaturalReference().setType(ReferenceType.Book);
225
            ((Reference)taxonName.getNomenclaturalReference()).setVolume(nomRefIssue);
226
        }
227

    
228
        if(!StringUtils.isEmpty(notesTxt)){
229
            notesTxt = notesTxt.replace("Notes: ", "").trim();
230
            taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
231
        }
232
        if(!StringUtils.isEmpty(caveats)){
233
            caveats = caveats.replace("Caveats: ", "").trim();
234
            taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
235
        }
236

    
237
        getNameService().save(taxonName);
238

    
239
        // Namerelations
240
        if(!StringUtils.isEmpty(authorsSpelling)){
241
            authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
242

    
243
            String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
244
            String[] nameStrTokens = StringUtils.split(nameStr, " ");
245

    
246
            ArrayUtils.reverse(authorSpellingTokens);
247
            ArrayUtils.reverse(nameStrTokens);
248

    
249
            for (int i = 0; i < nameStrTokens.length; i++){
250
                if(i < authorSpellingTokens.length){
251
                    nameStrTokens[i] = authorSpellingTokens[i];
252
                }
253
            }
254
            ArrayUtils.reverse(nameStrTokens);
255

    
256
            String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
257
            // build the fullnameString of the misspelled name
258
            misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
259

    
260
            TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
261
            misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
262
            getNameService().save(misspelledName);
263
        }
264

    
265
        // Replaced Synonyms
266
        if(!StringUtils.isEmpty(fullSynSubstStr)){
267
            fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
268
            BotanicalName replacedSynonymName = makeBotanicalName(state, regNumber, fullSynSubstStr, synSubstStr, null, null);
269
            replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
270
            getNameService().save(replacedSynonymName);
271
        }
272

    
273
        Reference sec = state.getConfig().getSecReference();
274
        Taxon taxon = Taxon.NewInstance(taxonName, sec);
275

    
276
        // Basionym
277
        if(fullBasionymStr != null){
278
            fullBasionymStr = fullBasionymStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
279
            BotanicalName basionym = makeBotanicalName(state, regNumber, fullBasionymStr, basionymNameStr, null, null);
280
            getNameService().save(basionym);
281
            taxonName.addBasionym(basionym);
282

    
283
            Synonym syn = Synonym.NewInstance(basionym, sec);
284
            taxon.addSynonym(syn, SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF());
285
            getTaxonService().save(syn);
286
        }
287

    
288
        // Markers
289
        if(isFossil){
290
            taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
291
        }
292

    
293
        // Types
294
        if(!StringUtils.isEmpty(typeStr)){
295
            makeTypeData(typeStr, taxonName, regNumber, state);
296
        }
297

    
298
        getTaxonService().save(taxon);
299
        if(higherTaxonNode != null){
300
            higherTaxonNode.addChildTaxon(taxon, null, null);
301
            getTaxonNodeService().save(higherTaxonNode);
302
        }
303

    
304
        return taxon;
305

    
306
    }
307

    
308
    private void makeTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
309

    
310
        Matcher m = typeSplitPattern.matcher(typeStr);
311

    
312
        if(m.matches()){
313
            String fieldUnitStr = m.group(TypesName.fieldUnit.name());
314
            // boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km // makes no sense!!!!
315
            FieldUnit fieldUnit = parseFieldUnit(fieldUnitStr, regNumber, state);
316
            if(fieldUnit == null) {
317
                // create a field unit with only a titleCache using the fieldUnitStr substring
318
                logger.warn(csvReportLine(regNumber, "Type: fieldUnitStr can not be parsed", fieldUnitStr));
319
                fieldUnit = FieldUnit.NewInstance();
320
                fieldUnit.setTitleCache(fieldUnitStr, true);
321
                getOccurrenceService().save(fieldUnit);
322
            }
323
            getOccurrenceService().save(fieldUnit);
324

    
325
            // all others ..
326
            addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.holotype.name()), TypesName.holotype, false, regNumber);
327
            addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.isotype.name()), TypesName.isotype, true, regNumber);
328

    
329
        } else {
330
            // create a field unit with only a titleCache using the full typeStr
331
            FieldUnit fieldUnit = FieldUnit.NewInstance();
332
            fieldUnit.setTitleCache(typeStr, true);
333
            getOccurrenceService().save(fieldUnit);
334
            logger.warn(csvReportLine(regNumber, "Type: field 'Type' can not be parsed", typeStr));
335
        }
336
        getNameService().save(taxonName);
337
    }
338

    
339
    /**
340
     * Currently only parses the collector, fieldNumber and the collection date.
341
     *
342
     * @param fieldUnitStr
343
     * @param regNumber
344
     * @param state
345
     * @return null if the fieldUnitStr could not be parsed
346
     */
347
    private FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
348

    
349
        FieldUnit fieldUnit = null;
350

    
351
        Matcher m1 = collectorPattern.matcher(fieldUnitStr);
352
        if(m1.matches()){
353
            String collectionData = m1.group(1); // like (leg. Metzeltin, 30. 9. 1996)
354
            if(collectionData == null){
355
                collectionData = m1.group(2); // like leg. Metzeltin, 30. 9. 1996
356
            }
357
            if(collectionData == null){
358
                return null;
359
            }
360

    
361
            String collectorStr = null;
362
            String detailStr = null;
363
            Partial date = null;
364
            String fieldNumber = null;
365

    
366
            Matcher m2 = collectionDataPattern.matcher(collectionData);
367
            if(m2.matches()){
368
                collectorStr = m2.group("collector");
369
                detailStr = m2.group("detail");
370

    
371
                // Try to make sense of the detailStr
372
                if(detailStr != null){
373
                    detailStr = detailStr.trim();
374
                    // 1. try to parse as date
375
                    date = parseDate(regNumber, detailStr);
376
                    if(date == null){
377
                        // 2. try to parse as number
378
                        if(collectorsNumber.matcher(detailStr).matches()){
379
                            fieldNumber = detailStr;
380
                        }
381
                    }
382
                }
383
                if(date == null && fieldNumber == null){
384
                    // detailed parsing not possible, so need fo fallback
385
                    collectorStr = collectionData;
386
                }
387
            }
388

    
389
            if(collectorStr != null) {
390
                fieldUnit = FieldUnit.NewInstance();
391
                GatheringEvent ge = GatheringEvent.NewInstance();
392

    
393
                TeamOrPersonBase agent =  state.getAgentBase(collectorStr);
394
                if(agent == null) {
395
                    agent = Person.NewTitledInstance(collectorStr);
396
                    getAgentService().save(agent);
397
                    state.putAgentBase(collectorStr, agent);
398
                }
399
                ge.setCollector(agent);
400

    
401
                if(date != null){
402
                    ge.setGatheringDate(date);
403
                }
404

    
405
                getEventBaseService().save(ge);
406
                fieldUnit.setGatheringEvent(ge);
407

    
408
                if(fieldNumber != null) {
409
                    fieldUnit.setFieldNumber(fieldNumber);
410
                }
411
                getOccurrenceService().save(fieldUnit);
412
            }
413
        }
414

    
415
        return fieldUnit;
416
    }
417

    
418
    private Partial parseDate(String regNumber, String dateStr) {
419

    
420
        Partial pupDate = null;
421
        boolean parseError = false;
422

    
423
        String day = null;
424
        String month = null;
425
        String monthName = null;
426
        String year = null;
427

    
428
        for(Pattern p : datePatterns){
429
            Matcher m2 = p.matcher(dateStr);
430
            if(m2.matches()){
431
                try {
432
                    year = m2.group("year");
433
                } catch (IllegalArgumentException e){
434
                    // named capture group not found
435
                }
436
                try {
437
                    month = m2.group("month");
438
                } catch (IllegalArgumentException e){
439
                    // named capture group not found
440
                }
441

    
442
                try {
443
                    monthName = m2.group("monthName");
444
                    month = monthFromName(monthName, regNumber);
445
                    if(month == null){
446
                        parseError = true;
447
                    }
448
                } catch (IllegalArgumentException e){
449
                    // named capture group not found
450
                }
451
                try {
452
                    day = m2.group("day");
453
                } catch (IllegalArgumentException e){
454
                    // named capture group not found
455
                }
456

    
457
                if(year != null){
458
                    if (year.length() == 2) {
459
                        // it is an abbreviated year from the 19** years
460
                        year = "19" + year;
461
                    }
462
                    break;
463
                } else {
464
                    parseError = true;
465
                }
466
            }
467
        }
468
        if(year == null){
469
            parseError = true;
470
        }
471
        List<DateTimeFieldType> types = new ArrayList<>();
472
        List<Integer> values = new ArrayList<>();
473
        if(!parseError) {
474
            types.add(DateTimeFieldType.year());
475
            values.add(Integer.parseInt(year));
476
            if (month != null) {
477
                types.add(DateTimeFieldType.monthOfYear());
478
                values.add(Integer.parseInt(month));
479
            }
480
            if (day != null) {
481
                types.add(DateTimeFieldType.dayOfMonth());
482
                values.add(Integer.parseInt(day));
483
            }
484
            pupDate = new Partial(types.toArray(new DateTimeFieldType[types.size()]), ArrayUtils.toPrimitive(values.toArray(new Integer[values.size()])));
485
        }
486
        return pupDate;
487
    }
488

    
489
    private String monthFromName(String monthName, String regNumber) {
490

    
491
        Integer month = monthFromNameMap.get(monthName.toLowerCase());
492
        if(month == null){
493
            logger.warn(csvReportLine(regNumber, "Unknown month name", monthName));
494
            return null;
495
        } else {
496
            return month.toString();
497
        }
498
    }
499

    
500

    
501
    private void addSpecimenTypes(BotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple, String regNumber){
502

    
503
        if(StringUtils.isEmpty(typeStr)){
504
            return;
505
        }
506
        typeStr = typeStr.trim().replaceAll("\\.$", "");
507

    
508
        Collection collection = null;
509
        DerivedUnit specimen = null;
510

    
511
        List<DerivedUnit> specimens = new ArrayList<>();
512
        if(multiple){
513
            String[] tokens = typeStr.split("\\s?,\\s?");
514
            for (String t : tokens) {
515
                // command to  list all complex parsabel types:
516
                // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Holotype:\s([A-Z]*\s)[^.]*?'
517
                // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Isotype[^:]*:\s([A-Z]*\s)[^.]*?'
518

    
519
                if(!t.isEmpty()){
520
                    // trying to parse the string
521
                    specimen = parseSpecimenType(fieldUnit, typeName, collection, t, regNumber);
522
                    if(specimen != null){
523
                        specimens.add(specimen);
524
                    } else {
525
                        // parsing was not successful make simple specimen
526
                        specimens.add(makeSpecimenType(fieldUnit, t));
527
                    }
528
                }
529
            }
530
        } else {
531
            specimen = parseSpecimenType(fieldUnit, typeName, collection, typeStr, regNumber);
532
            if(specimen != null) {
533
                specimens.add(specimen);
534
                // remember current collection
535
                collection = specimen.getCollection();
536
            } else {
537
                // parsing was not successful make simple specimen
538
                specimens.add(makeSpecimenType(fieldUnit, typeStr));
539
            }
540
        }
541

    
542
        for(DerivedUnit s : specimens){
543
            taxonName.addSpecimenTypeDesignation(s, typeName.status(), null, null, null, false, true);
544
       }
545
    }
546

    
547
    private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, String titleCache) {
548
        DerivedUnit specimen;DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
549
        facade.setTitleCache(titleCache.trim(), true);
550
        specimen = facade.innerDerivedUnit();
551
        return specimen;
552
    }
553

    
554
    /**
555
     *
556
     * @param fieldUnit
557
     * @param typeName
558
     * @param collection
559
     * @param text
560
     * @param regNumber
561
     * @return
562
     */
563
    private DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
564

    
565
        DerivedUnit specimen = null;
566

    
567
        String collectionCode = null;
568
        String subCollectionStr = null;
569
        String instituteStr = null;
570
        String accessionNumber = null;
571

    
572
        boolean unusualAccessionNumber = false;
573

    
574
        text = text.trim();
575

    
576
        // 1.  For Isotypes often the accession number is noted alone if the
577
        //     preceeding entry has a collection code.
578
        if(typeName .equals(TypesName.isotype) && collection != null){
579
            Matcher m = accessionNumberOnlyPattern.matcher(text);
580
            if(m.matches()){
581
                try {
582
                    accessionNumber = m.group("accNumber");
583
                    specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
584
                } catch (IllegalArgumentException e){
585
                    // match group acc_number not found
586
                }
587
            }
588
        }
589

    
590
        //2. try it the 'normal' way
591
        if(specimen == null) {
592
            for (Pattern p : specimenTypePatterns) {
593
                Matcher m = p.matcher(text);
594
                if (m.matches()) {
595
                    // collection code is mandatory
596
                    try {
597
                        collectionCode = m.group("colCode");
598
                    } catch (IllegalArgumentException e){
599
                        // match group colCode not found
600
                    }
601
                    try {
602
                        subCollectionStr = m.group("subCollection");
603
                    } catch (IllegalArgumentException e){
604
                        // match group subCollection not found
605
                    }
606
                    try {
607
                        instituteStr = m.group("institute");
608
                    } catch (IllegalArgumentException e){
609
                        // match group col_name not found
610
                    }
611
                    try {
612
                        accessionNumber = m.group("accNumber");
613

    
614
                        // try to improve the accessionNumber
615
                        if(accessionNumber!= null) {
616
                            accessionNumber = accessionNumber.trim();
617
                            Matcher m2 = accessionNumberOnlyPattern.matcher(accessionNumber);
618
                            String betterAccessionNumber = null;
619
                            if (m2.matches()) {
620
                                try {
621
                                    betterAccessionNumber = m.group("accNumber");
622
                                } catch (IllegalArgumentException e) {
623
                                    // match group acc_number not found
624
                                }
625
                            }
626
                            if (betterAccessionNumber != null) {
627
                                accessionNumber = betterAccessionNumber;
628
                            } else {
629
                                unusualAccessionNumber = true;
630
                            }
631
                        }
632

    
633
                    } catch (IllegalArgumentException e){
634
                        // match group acc_number not found
635
                    }
636

    
637
                    if(collectionCode == null && instituteStr == null){
638
                        logger.warn(csvReportLine(regNumber, "Type: neither 'collectionCode' nor 'institute' found in ", text));
639
                        continue;
640
                    }
641
                    collection = getCollection(collectionCode, instituteStr, subCollectionStr);
642
                    specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
643
                    break;
644
                }
645
            }
646
        }
647
        if(specimen == null) {
648
            logger.warn(csvReportLine(regNumber, "Type: Could not parse specimen", typeName.name().toString(), text));
649
        }
650
        if(unusualAccessionNumber){
651
            logger.warn(csvReportLine(regNumber, "Type: Unusual accession number", typeName.name().toString(), text, accessionNumber));
652
        }
653
        return specimen;
654
    }
655

    
656
    private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, Collection collection, String accessionNumber) {
657

    
658
        DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
659
        facade.setCollection(collection);
660
        if(accessionNumber != null){
661
            facade.setAccessionNumber(accessionNumber);
662
        }
663
        return facade.innerDerivedUnit();
664
    }
665

    
666
    private BotanicalName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String regNumber, String titleCacheStr, String nameStr,
667
                                            String authorStr, String nomRefTitle) {
668

    
669
        BotanicalName taxonName;// cache field for the taxonName.titleCache
670
        String taxonNameTitleCache = null;
671
        Map<String, AnnotationType> nameAnnotations = new HashMap<>();
672

    
673
        // TitleCache preprocessing
674
        if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
675
            nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
676
            titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
677
            if(authorStr != null) {
678
                authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
679
            }
680
        }
681

    
682
        // parse the full taxon name
683
        if(!StringUtils.isEmpty(nomRefTitle)){
684
            String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
685
            String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
686
            logger.debug(":::::" + taxonFullNameStr);
687
            taxonName = (BotanicalName) nameParser.parseReferencedName(taxonFullNameStr, NomenclaturalCode.ICNAFP, null);
688
        } else {
689
            taxonName = (BotanicalName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
690
        }
691

    
692
        taxonNameTitleCache = taxonName.getTitleCache().trim();
693
        if (taxonName.isProtectedTitleCache()) {
694
            logger.warn(csvReportLine(regNumber, "Name could not be parsed", titleCacheStr));
695
        } else {
696

    
697
            boolean doRestoreTitleCacheStr = false;
698

    
699
            // Check if titleCache and nameCache are plausible
700
            String titleCacheCompareStr = titleCacheStr;
701
            String nameCache = taxonName.getNameCache();
702
            String nameCompareStr = nameStr;
703
            if(taxonName.isBinomHybrid()){
704
                titleCacheCompareStr = titleCacheCompareStr.replace(" x ", " ×");
705
                nameCompareStr = nameCompareStr.replace(" x ", " ×");
706
            }
707
            if(taxonName.isMonomHybrid()){
708
                titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
709
                nameCompareStr = nameCompareStr.replace("^X ", "× ");
710
            }
711
            if(authorStr != null && authorStr.contains(" et ")){
712
                titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
713
            }
714
            if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
715
                logger.warn(csvReportLine(regNumber, "The generated titleCache differs from the imported string", taxonNameTitleCache, " != ", titleCacheStr, " ==> original titleCacheStr has been restored"));
716
                doRestoreTitleCacheStr = true;
717
            }
718
            if (!nameCache.trim().equals(nameCompareStr)) {
719
                logger.warn(csvReportLine(regNumber, "The parsed nameCache differs from field '" + NAMESTRING + "'", nameCache, " != ", nameCompareStr));
720
            }
721

    
722
            //  Author
723
            //nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
724
            //if (!titleCacheStr.equals(taxonName.getTitleCache())) {
725
            //    logger.warn(regNumber + ": titleCache has changed after setting authors, will restore original titleCacheStr");
726
            //    doRestoreTitleCacheStr = true;
727
            //}
728

    
729
            if(doRestoreTitleCacheStr){
730
                taxonName.setTitleCache(titleCacheStr, true);
731
            }
732

    
733
            // deduplicate
734
            replaceAuthorNamesAndNomRef(state, taxonName);
735
        }
736

    
737
        // Annotations
738
        if(!nameAnnotations.isEmpty()){
739
            for(String text : nameAnnotations.keySet()){
740
                taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
741
            }
742
            getNameService().save(taxonName);
743
        }
744
        return taxonName;
745
    }
746

    
747
    /**
748
     * @param state
749
     * @return
750
     */
751
    private TaxonNode getClassificationRootNode(IAPTImportState state) {
752

    
753
     //   Classification classification = state.getClassification();
754
     //   if (classification == null){
755
     //       IAPTImportConfigurator config = state.getConfig();
756
     //       classification = Classification.NewInstance(state.getConfig().getClassificationName());
757
     //       classification.setUuid(config.getClassificationUuid());
758
     //       classification.setReference(config.getSecReference());
759
     //       classification = getClassificationService().find(state.getConfig().getClassificationUuid());
760
     //   }
761
        TaxonNode rootNode = state.getRootNode();
762
        if (rootNode == null){
763
            rootNode = getTaxonNodeService().find(ROOT_UUID);
764
        }
765
        if (rootNode == null){
766
            Classification classification = state.getClassification();
767
            if (classification == null){
768
                Reference sec = state.getSecReference();
769
                String classificationName = state.getConfig().getClassificationName();
770
                Language language = Language.DEFAULT();
771
                classification = Classification.NewInstance(classificationName, sec, language);
772
                state.setClassification(classification);
773
                classification.setUuid(state.getConfig().getClassificationUuid());
774
                classification.getRootNode().setUuid(ROOT_UUID);
775
                getClassificationService().save(classification);
776
            }
777
            rootNode = classification.getRootNode();
778
            state.setRootNode(rootNode);
779
        }
780
        return rootNode;
781
    }
782

    
783
    private Collection getCollection(String collectionCode, String instituteStr, String subCollectionStr){
784

    
785
        Collection superCollection = null;
786
        if(subCollectionStr != null){
787
            superCollection = getCollection(collectionCode, instituteStr, null);
788
            collectionCode = subCollectionStr;
789
            instituteStr = null;
790
        }
791

    
792
        final String key = collectionCode + "-#i:" + StringUtils.defaultString(instituteStr);
793

    
794
        Collection collection = collectionMap.get(key);
795

    
796
        if(collection == null) {
797
            collection = Collection.NewInstance();
798
            collection.setCode(collectionCode);
799
            if(instituteStr != null){
800
                collection.setInstitute(Institution.NewNamedInstance(instituteStr));
801
            }
802
            if(superCollection != null){
803
                collection.setSuperCollection(superCollection);
804
            }
805
            collectionMap.put(key, collection);
806
            getCollectionService().save(collection);
807
        }
808

    
809
        return collection;
810
    }
811

    
812

    
813
    /**
814
     * @param record
815
     * @param originalKey
816
     * @param doUnescapeHtmlEntities
817
     * @return
818
     */
819
    private String getValue(HashMap<String, String> record, String originalKey, boolean doUnescapeHtmlEntities) {
820
        String value = record.get(originalKey);
821

    
822
        value = fixCharacters(value);
823

    
824
        if (! StringUtils.isBlank(value)) {
825
        	if (logger.isDebugEnabled()) {
826
        	    logger.debug(originalKey + ": " + value);
827
        	}
828
        	value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
829
            if(doUnescapeHtmlEntities){
830
                value = StringEscapeUtils.unescapeHtml(value);
831
            }
832
        	return value.trim();
833
        }else{
834
        	return null;
835
        }
836
    }
837

    
838
    /**
839
     * Fixes broken characters.
840
     * For details see
841
     * http://dev.e-taxonomy.eu/redmine/issues/6035
842
     *
843
     * @param value
844
     * @return
845
     */
846
    private String fixCharacters(String value) {
847

    
848
        value = StringUtils.replace(value, "s$K", "š");
849
        value = StringUtils.replace(value, "n$K", "ň");
850
        value = StringUtils.replace(value, "e$K", "ě");
851
        value = StringUtils.replace(value, "r$K", "ř");
852
        value = StringUtils.replace(value, "c$K", "č");
853
        value = StringUtils.replace(value, "z$K", "ž");
854
        value = StringUtils.replace(value, "S>U$K", "Š");
855
        value = StringUtils.replace(value, "C>U$K", "Č");
856
        value = StringUtils.replace(value, "R>U$K", "Ř");
857
        value = StringUtils.replace(value, "Z>U$K", "Ž");
858
        value = StringUtils.replace(value, "g$K", "ǧ");
859
        value = StringUtils.replace(value, "s$A", "ś");
860
        value = StringUtils.replace(value, "n$A", "ń");
861
        value = StringUtils.replace(value, "c$A", "ć");
862
        value = StringUtils.replace(value, "e$E", "ę");
863
        value = StringUtils.replace(value, "o$H", "õ");
864
        value = StringUtils.replace(value, "s$C", "ş");
865
        value = StringUtils.replace(value, "t$C", "ț");
866
        value = StringUtils.replace(value, "S>U$C", "Ş");
867
        value = StringUtils.replace(value, "a$O", "å");
868
        value = StringUtils.replace(value, "A>U$O", "Å");
869
        value = StringUtils.replace(value, "u$O", "ů");
870
        value = StringUtils.replace(value, "g$B", "ğ");
871
        value = StringUtils.replace(value, "g$B", "ĕ");
872
        value = StringUtils.replace(value, "a$B", "ă");
873
        value = StringUtils.replace(value, "l$/", "ł");
874
        value = StringUtils.replace(value, ">i", "ı");
875
        value = StringUtils.replace(value, "i$U", "ï");
876
        // Special-cases
877
        value = StringUtils.replace(value, "&yacute", "ý");
878
        value = StringUtils.replace(value, ">L", "Ł"); // corrected rule
879
        value = StringUtils.replace(value, "E>U$D", "З");
880
        value = StringUtils.replace(value, "S>U$E", "Ş");
881
        value = StringUtils.replace(value, "s$E", "ş");
882

    
883
        value = StringUtils.replace(value, "c$k", "č");
884
        value = StringUtils.replace(value, " U$K", " Š");
885

    
886
        return value;
887
    }
888

    
889

    
890
    /**
891
	 *  Stores taxa records in DB
892
	 */
893
	@Override
894
    protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
895

    
896
        String lineNumber = "L#" + state.getCurrentLine() + ": ";
897
        logger.setLevel(Level.DEBUG);
898
        HashMap<String, String> record = state.getOriginalRecord();
899
        logger.debug(lineNumber + record.toString());
900

    
901
        Set<String> keys = record.keySet();
902
        for (String key: keys) {
903
            if (! expectedKeys.contains(key)){
904
                logger.warn(lineNumber + "Unexpected Key: " + key);
905
            }
906
        }
907

    
908
        String reg_id = record.get(REGISTRATIONNO_PK);
909

    
910
        //higherTaxon
911
        String higherTaxaString = record.get(HIGHERTAXON);
912
        boolean isFossil = false;
913
        if(higherTaxaString.startsWith("FOSSIL ")){
914
            higherTaxaString = higherTaxaString.replace("FOSSIL ", "");
915
            isFossil = true;
916
        }
917
        TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state);
918

    
919
       //Taxon
920
        Taxon taxon = makeTaxon(record, state, higherTaxon, isFossil);
921
        if (taxon == null){
922
            logger.warn(lineNumber + "taxon could not be created and is null");
923
            return;
924
        }
925
        ((IAPTImportState)state).setCurrentTaxon(taxon);
926

    
927

    
928
		return;
929
    }
930

    
931
    private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
932
        String[] higherTaxaNames = higherTaxaString.toLowerCase().replaceAll("[\\[\\]]", "").split(":");
933
        TaxonNode higherTaxonNode = null;
934

    
935
        ITaxonTreeNode rootNode = getClassificationRootNode(state);
936
        for (String htn :  higherTaxaNames) {
937
            htn = StringUtils.capitalize(htn.trim());
938
            Taxon higherTaxon = state.getHigherTaxon(htn);
939
            if (higherTaxon != null){
940
                higherTaxonNode = higherTaxon.getTaxonNodes().iterator().next();
941
            }else{
942
                BotanicalName name = makeHigherTaxonName(state, htn);
943
                Reference sec = state.getSecReference();
944
                higherTaxon = Taxon.NewInstance(name, sec);
945
                getTaxonService().save(higherTaxon);
946
                higherTaxonNode = rootNode.addChildTaxon(higherTaxon, sec, null);
947
                state.putHigherTaxon(htn, higherTaxon);
948
                getClassificationService().saveTreeNode(higherTaxonNode);
949
            }
950
            rootNode = higherTaxonNode;
951
        }
952
        return higherTaxonNode;
953
    }
954

    
955
    private BotanicalName makeHigherTaxonName(IAPTImportState state, String name) {
956

    
957
        Rank rank = guessRank(name);
958

    
959
        BotanicalName taxonName = BotanicalName.NewInstance(rank);
960
        taxonName.addSource(makeOriginalSource(state));
961
        taxonName.setGenusOrUninomial(StringUtils.capitalize(name));
962
        return taxonName;
963
    }
964

    
965
    private Rank guessRank(String name) {
966

    
967
        // normalize
968
        name = name.replaceAll("\\(.*\\)", "").trim();
969

    
970
        if(name.matches("^Plantae$|^Fungi$")){
971
           return Rank.KINGDOM();
972
        } else if(name.matches("^Incertae sedis$|^No group assigned$")){
973
           return rankFamilyIncertisSedis();
974
        } else if(name.matches(".*phyta$|.*mycota$")){
975
           return Rank.SECTION_BOTANY();
976
        } else if(name.matches(".*phytina$|.*mycotina$")){
977
           return Rank.SUBSECTION_BOTANY();
978
        } else if(name.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones
979
            return rankUnrankedSupraGeneric();
980
        } else if(name.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){
981
           return Rank.CLASS();
982
        } else if(name.matches(".*idae$|.*phycidae$|.*mycetidae$")){
983
           return Rank.SUBCLASS();
984
        } else if(name.matches(".*ales$")){
985
           return Rank.ORDER();
986
        } else if(name.matches(".*ineae$")){
987
           return Rank.SUBORDER();
988
        } else if(name.matches(".*aceae$")){
989
            return Rank.FAMILY();
990
        } else if(name.matches(".*oideae$")){
991
           return Rank.SUBFAMILY();
992
        } else
993
        //    if(name.matches(".*eae$")){
994
        //    return Rank.TRIBE();
995
        // } else
996
            if(name.matches(".*inae$")){
997
           return Rank.SUBTRIBE();
998
        } else if(name.matches(".*ae$")){
999
           return Rank.FAMILY();
1000
        }
1001
        return Rank.UNKNOWN_RANK();
1002
    }
1003

    
1004
    private Rank rankUnrankedSupraGeneric() {
1005

    
1006
        if(rankUnrankedSupraGeneric == null){
1007
            rankUnrankedSupraGeneric = Rank.NewInstance(RankClass.Suprageneric, "Unranked supra generic", " ", " ");
1008
            getTermService().save(rankUnrankedSupraGeneric);
1009
        }
1010
        return rankUnrankedSupraGeneric;
1011
    }
1012

    
1013
    private Rank rankFamilyIncertisSedis() {
1014

    
1015
        if(familyIncertisSedis == null){
1016
            familyIncertisSedis = Rank.NewInstance(RankClass.Suprageneric, "Family incertis sedis", " ", " ");
1017
            getTermService().save(familyIncertisSedis);
1018
        }
1019
        return familyIncertisSedis;
1020
    }
1021

    
1022
    private AnnotationType annotationTypeCaveats(){
1023
        if(annotationTypeCaveats == null){
1024
            annotationTypeCaveats = AnnotationType.NewInstance("Caveats", "Caveats", "");
1025
            getTermService().save(annotationTypeCaveats);
1026
        }
1027
        return annotationTypeCaveats;
1028
    }
1029

    
1030

    
1031
    /**
1032
     * @param state
1033
     * @return
1034
     */
1035
    private IdentifiableSource makeOriginalSource(IAPTImportState state) {
1036
        return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1037
    }
1038

    
1039

    
1040
    private Reference makeReference(IAPTImportState state, UUID uuidRef) {
1041
        Reference ref = state.getReference(uuidRef);
1042
        if (ref == null){
1043
            ref = getReferenceService().find(uuidRef);
1044
            state.putReference(uuidRef, ref);
1045
        }
1046
        return ref;
1047
    }
1048

    
1049
    private MarkerType markerTypeFossil(){
1050
        if(this.markerTypeFossil == null){
1051
            markerTypeFossil = MarkerType.NewInstance("isFossilTaxon", "isFossil", null);
1052
            getTermService().save(this.markerTypeFossil);
1053
        }
1054
        return markerTypeFossil;
1055
    }
1056

    
1057
    private String csvReportLine(String regId, String message, String ... fields){
1058
        StringBuilder out = new StringBuilder("regID#");
1059
        out.append(regId).append(",\"").append(message).append('"');
1060

    
1061
        for(String f : fields){
1062
            out.append(",\"").append(f).append('"');
1063
        }
1064
        return out.toString();
1065
    }
1066

    
1067

    
1068
}
(1-1/4)