app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java

   1 /**
   2  * Copyright (C) 2007 EDIT
   3  * European Distributed Institute of Taxonomy
   4  * http://www.e-taxonomy.eu
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version 1.1
   7  * See LICENSE.TXT at the top of this package for the full license terms.
   8  */
   9
  10 package eu.etaxonomy.cdm.io.iapt;
  11
  12 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
  13 import eu.etaxonomy.cdm.common.CdmUtils;
  14 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
  15 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
  16 import eu.etaxonomy.cdm.model.agent.Institution;
  17 import eu.etaxonomy.cdm.model.agent.Person;
  18 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
  19 import eu.etaxonomy.cdm.model.common.*;
  20 import eu.etaxonomy.cdm.model.name.*;
  21 import eu.etaxonomy.cdm.model.occurrence.*;
  22 import eu.etaxonomy.cdm.model.occurrence.Collection;
  23 import eu.etaxonomy.cdm.model.reference.Reference;
  24 import eu.etaxonomy.cdm.model.reference.ReferenceType;
  25 import eu.etaxonomy.cdm.model.taxon.*;
  26 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
  27 import org.apache.commons.lang.ArrayUtils;
  28 import org.apache.commons.lang.StringEscapeUtils;
  29 import org.apache.commons.lang.StringUtils;
  30 import org.apache.log4j.Level;
  31 import org.apache.log4j.Logger;
  32 import org.joda.time.DateTimeFieldType;
  33 import org.joda.time.Partial;
  34 import org.joda.time.format.DateTimeFormat;
  35 import org.joda.time.format.DateTimeFormatter;
  36 import org.springframework.stereotype.Component;
  37
  38 import java.util.*;
  39 import java.util.regex.Matcher;
  40 import java.util.regex.Pattern;
  41
  42 /**
  43  * @author a.mueller
  44  * @created 05.01.2016
  45  */
  46
  47 @Component("iAPTExcelImport")
  48 public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends SimpleExcelTaxonImport<CONFIG> {
  49     private static final long serialVersionUID = -747486709409732371L;
  50     private static final Logger logger = Logger.getLogger(IAPTExcelImport.class);
  51     public static final String ANNOTATION_MARKER_STRING = "[*]";
  52
  53
  54     private static UUID ROOT_UUID = UUID.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
  55
  56     private static NonViralNameParserImpl nameParser = NonViralNameParserImpl.NewInstance();
  57
  58     private final static String REGISTRATIONNO_PK= "RegistrationNo_Pk";
  59     private final static String HIGHERTAXON= "HigherTaxon";
  60     private final static String FULLNAME= "FullName";
  61     private final static String AUTHORSSPELLING= "AuthorsSpelling";
  62     private final static String LITSTRING= "LitString";
  63     private final static String REGISTRATION= "Registration";
  64     private final static String TYPE= "Type";
  65     private final static String CAVEATS= "Caveats";
  66     private final static String FULLBASIONYM= "FullBasionym";
  67     private final static String FULLSYNSUBST= "FullSynSubst";
  68     private final static String NOTESTXT= "NotesTxt";
  69     private final static String REGDATE= "RegDate";
  70     private final static String NAMESTRING= "NameString";
  71     private final static String BASIONYMSTRING= "BasionymString";
  72     private final static String SYNSUBSTSTR= "SynSubstStr";
  73     private final static String AUTHORSTRING= "AuthorString";
  74
  75     private  static List<String> expectedKeys= Arrays.asList(new String[]{
  76             REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING});
  77
  78     private static final Pattern nomRefTokenizeP = Pattern.compile("^(?<title>.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)\\.?$");
  79     private static final Pattern[] datePatterns = new Pattern[]{
  80             // NOTE:
  81             // The order of the patterns is extremely important!!!
  82             //
  83             // all patterns cover the years 1700 - 1999
  84             Pattern.compile("^(?<year>1[7,8,9][0-9]{2})$"), // only year, like '1969'
  85             Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
  86             Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
  87             Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
  88             Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<month>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
  89             Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
  90             Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
  91             Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),//  partial date like 1999-04
  92             Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
  93             Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
  94         };
  95     private static final Pattern typeSplitPattern =  Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
  96
  97     private static final Pattern collectorPattern =  Pattern.compile(".*?(?<fullStr1>\\(leg\\.\\s+(?<data1>[^\\)]*)\\))|.*?(?<fullStr2>\\sleg\\.\\s+(?<data2>.*?)\\.?)$");
  98     private static final Pattern collectionDataPattern =  Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
  99     private static final Pattern collectorsNumber =  Pattern.compile("^([nN]o\\.\\s.*)$");
 100
 101     // AccessionNumbers: , #.*, n°:?, 96/3293, No..*, -?\w{1,3}-[0-9\-/]*
 102     private static final Pattern accessionNumberOnlyPattern = Pattern.compile("^(?<accNumber>(?:n°\\:?\\s?|#|No\\.?\\s?)?[\\d\\w\\-/]*)$");
 103
 104     private static final Pattern[] specimenTypePatterns = new Pattern[]{
 105             Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
 106             Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
 107             Pattern.compile("^(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<institute>.*?)(?<accNumber>Praep\\..*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
 108             Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
 109     };
 110
 111     private static Map<String, Integer> monthFromNameMap = new HashMap<>();
 112
 113     static {
 114         String[] ck = new String[]{"leden", "únor", "březen", "duben", "květen", "červen", "červenec ", "srpen", "září", "říjen", "listopad", "prosinec"};
 115         String[] fr = new String[]{"janvier", "février", "mars", "avril", "mai", "juin", "juillet", "août", "septembre", "octobre", "novembre", "décembre"};
 116         String[] de = new String[]{"januar", "februar", "märz", "april", "mai", "juni", "juli", "august", "september", "oktober", "november", "dezember"};
 117         String[] en = new String[]{"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"};
 118         String[] it = new String[]{"gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"};
 119         String[] sp = new String[]{"enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"};
 120         String[] de_abbrev = new String[]{"jan.", "feb.", "märz", "apr.", "mai", "jun.", "jul.", "aug.", "sept.", "okt.", "nov.", "dez."};
 121         String[] en_abbrev = new String[]{"jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sep.", "oct.", "nov.", "dec."};
 122         String[] port = new String[]{"Janeiro", "Fevereiro", "Março", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"};
 123         String[] rom_num = new String[]{"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii"};
 124
 125         String[][] perLang =  new String[][]{ck, de, fr, en, it, sp, port, de_abbrev, en_abbrev, rom_num};
 126
 127         for (String[] months: perLang) {
 128             for(int m = 1; m < 13; m++){
 129                 monthFromNameMap.put(months[m - 1].toLowerCase(), m);
 130             }
 131         }
 132
 133         // special cases
 134         monthFromNameMap.put("mar", 3);
 135         monthFromNameMap.put("dec", 12);
 136         monthFromNameMap.put("Februari", 2);
 137     }
 138
 139
 140     DateTimeFormatter formatterYear = DateTimeFormat.forPattern("yyyy");
 141
 142     private Map<String, Collection> collectionMap = new HashMap<>();
 143
 144
 145     enum TypesName {
 146         fieldUnit, holotype, isotype;
 147
 148         public SpecimenTypeDesignationStatus status(){
 149             switch (this) {
 150                 case holotype:
 151                     return SpecimenTypeDesignationStatus.HOLOTYPE();
 152                 case isotype:
 153                     return SpecimenTypeDesignationStatus.ISOTYPE();
 154                 default:
 155                     return null;
 156             }
 157         }
 158     }
 159
 160     private MarkerType markerTypeFossil = null;
 161     private Rank rankUnrankedSupraGeneric = null;
 162     private Rank familyIncertisSedis = null;
 163     private AnnotationType annotationTypeCaveats = null;
 164
 165     private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
 166                             TaxonNode higherTaxonNode, boolean isFossil) {
 167
 168         String regNumber = getValue(record, REGISTRATIONNO_PK, false);
 169         String regStr = getValue(record, REGISTRATION, true);
 170         String titleCacheStr = getValue(record, FULLNAME, true);
 171         String nameStr = getValue(record, NAMESTRING, true);
 172         String authorStr = getValue(record, AUTHORSTRING, true);
 173         String nomRefStr = getValue(record, LITSTRING, true);
 174         String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
 175         String notesTxt = getValue(record, NOTESTXT, true);
 176         String caveats = getValue(record, CAVEATS, true);
 177         String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
 178         String fullBasionymStr = getValue(record, FULLBASIONYM, true);
 179         String basionymNameStr = getValue(record, FULLBASIONYM, true);
 180         String synSubstStr = getValue(record, SYNSUBSTSTR, true);
 181         String typeStr = getValue(record, TYPE, true);
 182
 183
 184         String nomRefTitle = null;
 185         String nomRefDetail;
 186         String nomRefPupDate = null;
 187         String nomRefIssue = null;
 188         Partial pupDate = null;
 189
 190         // preprocess nomRef: separate citation, reference detail, publishing date
 191         if(!StringUtils.isEmpty(nomRefStr)){
 192             nomRefStr = nomRefStr.trim();
 193             Matcher m = nomRefTokenizeP.matcher(nomRefStr);
 194             if(m.matches()){
 195                 nomRefTitle = m.group("title");
 196                 nomRefDetail = m.group("detail");
 197                 nomRefPupDate = m.group("date").trim();
 198                 nomRefIssue = m.group("issue");
 199
 200                 pupDate = parseDate(regNumber, nomRefPupDate);
 201                 if (pupDate != null) {
 202                     nomRefTitle = nomRefTitle + ": " + nomRefDetail + ". " + pupDate.toString(formatterYear) + ".";
 203                 } else {
 204                     logger.warn(csvReportLine(regNumber, "Pub date", nomRefPupDate, "in", nomRefStr, "not parsable"));
 205                 }
 206             } else {
 207                 nomRefTitle = nomRefStr;
 208             }
 209         }
 210
 211         BotanicalName taxonName = makeBotanicalName(state, regNumber, titleCacheStr, nameStr, authorStr, nomRefTitle);
 212
 213         // always add the original strings of parsed data as annotation
 214         taxonName.addAnnotation(Annotation.NewInstance("imported and parsed data strings:" +
 215                         "\n -  '" + LITSTRING + "': "+ nomRefStr +
 216                         "\n -  '" + TYPE + "': " + typeStr +
 217                         "\n -  '" + REGISTRATION  + "': " + regStr
 218                 , AnnotationType.TECHNICAL(), Language.DEFAULT()));
 219
 220         if(pupDate != null) {
 221             taxonName.getNomenclaturalReference().setDatePublished(TimePeriod.NewInstance(pupDate));
 222         }
 223         if(nomRefIssue != null) {
 224             taxonName.getNomenclaturalReference().setType(ReferenceType.Book);
 225             ((Reference)taxonName.getNomenclaturalReference()).setVolume(nomRefIssue);
 226         }
 227
 228         if(!StringUtils.isEmpty(notesTxt)){
 229             notesTxt = notesTxt.replace("Notes: ", "").trim();
 230             taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
 231         }
 232         if(!StringUtils.isEmpty(caveats)){
 233             caveats = caveats.replace("Caveats: ", "").trim();
 234             taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
 235         }
 236
 237         getNameService().save(taxonName);
 238
 239         // Namerelations
 240         if(!StringUtils.isEmpty(authorsSpelling)){
 241             authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
 242
 243             String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
 244             String[] nameStrTokens = StringUtils.split(nameStr, " ");
 245
 246             ArrayUtils.reverse(authorSpellingTokens);
 247             ArrayUtils.reverse(nameStrTokens);
 248
 249             for (int i = 0; i < nameStrTokens.length; i++){
 250                 if(i < authorSpellingTokens.length){
 251                     nameStrTokens[i] = authorSpellingTokens[i];
 252                 }
 253             }
 254             ArrayUtils.reverse(nameStrTokens);
 255
 256             String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
 257             // build the fullnameString of the misspelled name
 258             misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
 259
 260             TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
 261             misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
 262             getNameService().save(misspelledName);
 263         }
 264
 265         // Replaced Synonyms
 266         if(!StringUtils.isEmpty(fullSynSubstStr)){
 267             fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
 268             BotanicalName replacedSynonymName = makeBotanicalName(state, regNumber, fullSynSubstStr, synSubstStr, null, null);
 269             replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
 270             getNameService().save(replacedSynonymName);
 271         }
 272
 273         Reference sec = state.getConfig().getSecReference();
 274         Taxon taxon = Taxon.NewInstance(taxonName, sec);
 275
 276         // Basionym
 277         if(fullBasionymStr != null){
 278             fullBasionymStr = fullBasionymStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
 279             BotanicalName basionym = makeBotanicalName(state, regNumber, fullBasionymStr, basionymNameStr, null, null);
 280             getNameService().save(basionym);
 281             taxonName.addBasionym(basionym);
 282
 283             Synonym syn = Synonym.NewInstance(basionym, sec);
 284             taxon.addSynonym(syn, SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF());
 285             getTaxonService().save(syn);
 286         }
 287
 288         // Markers
 289         if(isFossil){
 290             taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
 291         }
 292
 293         // Types
 294         if(!StringUtils.isEmpty(typeStr)){
 295             makeTypeData(typeStr, taxonName, regNumber, state);
 296         }
 297
 298         getTaxonService().save(taxon);
 299         if(higherTaxonNode != null){
 300             higherTaxonNode.addChildTaxon(taxon, null, null);
 301             getTaxonNodeService().save(higherTaxonNode);
 302         }
 303
 304         return taxon;
 305     }
 306
 307     private void makeTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
 308
 309         Matcher m = typeSplitPattern.matcher(typeStr);
 310
 311         if(m.matches()){
 312             String fieldUnitStr = m.group(TypesName.fieldUnit.name());
 313             // boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km // makes no sense!!!!
 314             FieldUnit fieldUnit = parseFieldUnit(fieldUnitStr, regNumber, state);
 315             if(fieldUnit == null) {
 316                 // create a field unit with only a titleCache using the fieldUnitStr substring
 317                 logger.warn(csvReportLine(regNumber, "Type: fieldUnitStr can not be parsed", fieldUnitStr));
 318                 fieldUnit = FieldUnit.NewInstance();
 319                 fieldUnit.setTitleCache(fieldUnitStr, true);
 320                 getOccurrenceService().save(fieldUnit);
 321             }
 322             getOccurrenceService().save(fieldUnit);
 323
 324             // all others ..
 325             addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.holotype.name()), TypesName.holotype, false, regNumber);
 326             addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.isotype.name()), TypesName.isotype, true, regNumber);
 327
 328         } else {
 329             // create a field unit with only a titleCache using the full typeStr
 330             FieldUnit fieldUnit = FieldUnit.NewInstance();
 331             fieldUnit.setTitleCache(typeStr, true);
 332             getOccurrenceService().save(fieldUnit);
 333             logger.warn(csvReportLine(regNumber, "Type: field 'Type' can not be parsed", typeStr));
 334         }
 335         getNameService().save(taxonName);
 336     }
 337
 338     /**
 339      * Currently only parses the collector, fieldNumber and the collection date.
 340      *
 341      * @param fieldUnitStr
 342      * @param regNumber
 343      * @param state
 344      * @return null if the fieldUnitStr could not be parsed
 345      */
 346     private FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
 347
 348         FieldUnit fieldUnit = null;
 349
 350         Matcher m1 = collectorPattern.matcher(fieldUnitStr);
 351         if(m1.matches()){
 352
 353             String collectorData = m1.group(2); // like (leg. Metzeltin, 30. 9. 1996)
 354             String removal = m1.group(1);
 355             if(collectorData == null){
 356                 collectorData = m1.group(4); // like leg. Metzeltin, 30. 9. 1996
 357                 removal = m1.group(3);
 358             }
 359             if(collectorData == null){
 360                 return null;
 361             }
 362
 363             // the fieldUnitStr is parsable
 364             // remove all collectorData from the fieldUnitStr and use the rest as locality
 365             String locality = fieldUnitStr.replace(removal, "");
 366
 367             String collectorStr = null;
 368             String detailStr = null;
 369             Partial date = null;
 370             String fieldNumber = null;
 371
 372             Matcher m2 = collectionDataPattern.matcher(collectorData);
 373             if(m2.matches()){
 374                 collectorStr = m2.group("collector");
 375                 detailStr = m2.group("detail");
 376
 377                 // Try to make sense of the detailStr
 378                 if(detailStr != null){
 379                     detailStr = detailStr.trim();
 380                     // 1. try to parse as date
 381                     date = parseDate(regNumber, detailStr);
 382                     if(date == null){
 383                         // 2. try to parse as number
 384                         if(collectorsNumber.matcher(detailStr).matches()){
 385                             fieldNumber = detailStr;
 386                         }
 387                     }
 388                 }
 389                 if(date == null && fieldNumber == null){
 390                     // detailed parsing not possible, so need fo fallback
 391                     collectorStr = collectorData;
 392                 }
 393             }
 394
 395             if(collectorStr == null) {
 396                 collectorStr = collectorData;
 397             }
 398
 399             fieldUnit = FieldUnit.NewInstance();
 400             GatheringEvent ge = GatheringEvent.NewInstance();
 401             ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE()));
 402
 403             TeamOrPersonBase agent =  state.getAgentBase(collectorStr);
 404             if(agent == null) {
 405                 agent = Person.NewTitledInstance(collectorStr);
 406                 getAgentService().save(agent);
 407                 state.putAgentBase(collectorStr, agent);
 408             }
 409             ge.setCollector(agent);
 410
 411             if(date != null){
 412                 ge.setGatheringDate(date);
 413             }
 414
 415             getEventBaseService().save(ge);
 416             fieldUnit.setGatheringEvent(ge);
 417
 418             if(fieldNumber != null) {
 419                 fieldUnit.setFieldNumber(fieldNumber);
 420             }
 421             getOccurrenceService().save(fieldUnit);
 422
 423         }
 424
 425         return fieldUnit;
 426     }
 427
 428     private Partial parseDate(String regNumber, String dateStr) {
 429
 430         Partial pupDate = null;
 431         boolean parseError = false;
 432
 433         String day = null;
 434         String month = null;
 435         String monthName = null;
 436         String year = null;
 437
 438         for(Pattern p : datePatterns){
 439             Matcher m2 = p.matcher(dateStr);
 440             if(m2.matches()){
 441                 try {
 442                     year = m2.group("year");
 443                 } catch (IllegalArgumentException e){
 444                     // named capture group not found
 445                 }
 446                 try {
 447                     month = m2.group("month");
 448                 } catch (IllegalArgumentException e){
 449                     // named capture group not found
 450                 }
 451
 452                 try {
 453                     monthName = m2.group("monthName");
 454                     month = monthFromName(monthName, regNumber);
 455                     if(month == null){
 456                         parseError = true;
 457                     }
 458                 } catch (IllegalArgumentException e){
 459                     // named capture group not found
 460                 }
 461                 try {
 462                     day = m2.group("day");
 463                 } catch (IllegalArgumentException e){
 464                     // named capture group not found
 465                 }
 466
 467                 if(year != null){
 468                     if (year.length() == 2) {
 469                         // it is an abbreviated year from the 19** years
 470                         year = "19" + year;
 471                     }
 472                     break;
 473                 } else {
 474                     parseError = true;
 475                 }
 476             }
 477         }
 478         if(year == null){
 479             parseError = true;
 480         }
 481         List<DateTimeFieldType> types = new ArrayList<>();
 482         List<Integer> values = new ArrayList<>();
 483         if(!parseError) {
 484             types.add(DateTimeFieldType.year());
 485             values.add(Integer.parseInt(year));
 486             if (month != null) {
 487                 types.add(DateTimeFieldType.monthOfYear());
 488                 values.add(Integer.parseInt(month));
 489             }
 490             if (day != null) {
 491                 types.add(DateTimeFieldType.dayOfMonth());
 492                 values.add(Integer.parseInt(day));
 493             }
 494             pupDate = new Partial(types.toArray(new DateTimeFieldType[types.size()]), ArrayUtils.toPrimitive(values.toArray(new Integer[values.size()])));
 495         }
 496         return pupDate;
 497     }
 498
 499     private String monthFromName(String monthName, String regNumber) {
 500
 501         Integer month = monthFromNameMap.get(monthName.toLowerCase());
 502         if(month == null){
 503             logger.warn(csvReportLine(regNumber, "Unknown month name", monthName));
 504             return null;
 505         } else {
 506             return month.toString();
 507         }
 508     }
 509
 510
 511     private void addSpecimenTypes(BotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple, String regNumber){
 512
 513         if(StringUtils.isEmpty(typeStr)){
 514             return;
 515         }
 516         typeStr = typeStr.trim().replaceAll("\\.$", "");
 517
 518         Collection collection = null;
 519         DerivedUnit specimen = null;
 520
 521         List<DerivedUnit> specimens = new ArrayList<>();
 522         if(multiple){
 523             String[] tokens = typeStr.split("\\s?,\\s?");
 524             for (String t : tokens) {
 525                 // command to  list all complex parsabel types:
 526                 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Holotype:\s([A-Z]*\s)[^.]*?'
 527                 // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Isotype[^:]*:\s([A-Z]*\s)[^.]*?'
 528
 529                 if(!t.isEmpty()){
 530                     // trying to parse the string
 531                     specimen = parseSpecimenType(fieldUnit, typeName, collection, t, regNumber);
 532                     if(specimen != null){
 533                         specimens.add(specimen);
 534                     } else {
 535                         // parsing was not successful make simple specimen
 536                         specimens.add(makeSpecimenType(fieldUnit, t));
 537                     }
 538                 }
 539             }
 540         } else {
 541             specimen = parseSpecimenType(fieldUnit, typeName, collection, typeStr, regNumber);
 542             if(specimen != null) {
 543                 specimens.add(specimen);
 544                 // remember current collection
 545                 collection = specimen.getCollection();
 546             } else {
 547                 // parsing was not successful make simple specimen
 548                 specimens.add(makeSpecimenType(fieldUnit, typeStr));
 549             }
 550         }
 551
 552         for(DerivedUnit s : specimens){
 553             taxonName.addSpecimenTypeDesignation(s, typeName.status(), null, null, null, false, true);
 554        }
 555     }
 556
 557     private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, String titleCache) {
 558         DerivedUnit specimen;DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
 559         facade.setTitleCache(titleCache.trim(), true);
 560         specimen = facade.innerDerivedUnit();
 561         return specimen;
 562     }
 563
 564     /**
 565      *
 566      * @param fieldUnit
 567      * @param typeName
 568      * @param collection
 569      * @param text
 570      * @param regNumber
 571      * @return
 572      */
 573     private DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
 574
 575         DerivedUnit specimen = null;
 576
 577         String collectionCode = null;
 578         String subCollectionStr = null;
 579         String instituteStr = null;
 580         String accessionNumber = null;
 581
 582         boolean unusualAccessionNumber = false;
 583
 584         text = text.trim();
 585
 586         // 1.  For Isotypes often the accession number is noted alone if the
 587         //     preceeding entry has a collection code.
 588         if(typeName .equals(TypesName.isotype) && collection != null){
 589             Matcher m = accessionNumberOnlyPattern.matcher(text);
 590             if(m.matches()){
 591                 try {
 592                     accessionNumber = m.group("accNumber");
 593                     specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
 594                 } catch (IllegalArgumentException e){
 595                     // match group acc_number not found
 596                 }
 597             }
 598         }
 599
 600         //2. try it the 'normal' way
 601         if(specimen == null) {
 602             for (Pattern p : specimenTypePatterns) {
 603                 Matcher m = p.matcher(text);
 604                 if (m.matches()) {
 605                     // collection code is mandatory
 606                     try {
 607                         collectionCode = m.group("colCode");
 608                     } catch (IllegalArgumentException e){
 609                         // match group colCode not found
 610                     }
 611                     try {
 612                         subCollectionStr = m.group("subCollection");
 613                     } catch (IllegalArgumentException e){
 614                         // match group subCollection not found
 615                     }
 616                     try {
 617                         instituteStr = m.group("institute");
 618                     } catch (IllegalArgumentException e){
 619                         // match group col_name not found
 620                     }
 621                     try {
 622                         accessionNumber = m.group("accNumber");
 623
 624                         // try to improve the accessionNumber
 625                         if(accessionNumber!= null) {
 626                             accessionNumber = accessionNumber.trim();
 627                             Matcher m2 = accessionNumberOnlyPattern.matcher(accessionNumber);
 628                             String betterAccessionNumber = null;
 629                             if (m2.matches()) {
 630                                 try {
 631                                     betterAccessionNumber = m.group("accNumber");
 632                                 } catch (IllegalArgumentException e) {
 633                                     // match group acc_number not found
 634                                 }
 635                             }
 636                             if (betterAccessionNumber != null) {
 637                                 accessionNumber = betterAccessionNumber;
 638                             } else {
 639                                 unusualAccessionNumber = true;
 640                             }
 641                         }
 642
 643                     } catch (IllegalArgumentException e){
 644                         // match group acc_number not found
 645                     }
 646
 647                     if(collectionCode == null && instituteStr == null){
 648                         logger.warn(csvReportLine(regNumber, "Type: neither 'collectionCode' nor 'institute' found in ", text));
 649                         continue;
 650                     }
 651                     collection = getCollection(collectionCode, instituteStr, subCollectionStr);
 652                     specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
 653                     break;
 654                 }
 655             }
 656         }
 657         if(specimen == null) {
 658             logger.warn(csvReportLine(regNumber, "Type: Could not parse specimen", typeName.name().toString(), text));
 659         }
 660         if(unusualAccessionNumber){
 661             logger.warn(csvReportLine(regNumber, "Type: Unusual accession number", typeName.name().toString(), text, accessionNumber));
 662         }
 663         return specimen;
 664     }
 665
 666     private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, Collection collection, String accessionNumber) {
 667
 668         DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
 669         facade.setCollection(collection);
 670         if(accessionNumber != null){
 671             facade.setAccessionNumber(accessionNumber);
 672         }
 673         return facade.innerDerivedUnit();
 674     }
 675
 676     private BotanicalName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String regNumber, String titleCacheStr, String nameStr,
 677                                             String authorStr, String nomRefTitle) {
 678
 679         BotanicalName taxonName;// cache field for the taxonName.titleCache
 680         String taxonNameTitleCache = null;
 681         Map<String, AnnotationType> nameAnnotations = new HashMap<>();
 682
 683         // TitleCache preprocessing
 684         if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
 685             nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
 686             titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
 687             if(authorStr != null) {
 688                 authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
 689             }
 690         }
 691
 692         // parse the full taxon name
 693         if(!StringUtils.isEmpty(nomRefTitle)){
 694             String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
 695             String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
 696             logger.debug(":::::" + taxonFullNameStr);
 697             taxonName = (BotanicalName) nameParser.parseReferencedName(taxonFullNameStr, NomenclaturalCode.ICNAFP, null);
 698         } else {
 699             taxonName = (BotanicalName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
 700         }
 701
 702         taxonNameTitleCache = taxonName.getTitleCache().trim();
 703         if (taxonName.isProtectedTitleCache()) {
 704             logger.warn(csvReportLine(regNumber, "Name could not be parsed", titleCacheStr));
 705         } else {
 706
 707             boolean doRestoreTitleCacheStr = false;
 708
 709             // Check if titleCache and nameCache are plausible
 710             String titleCacheCompareStr = titleCacheStr;
 711             String nameCache = taxonName.getNameCache();
 712             String nameCompareStr = nameStr;
 713             if(taxonName.isBinomHybrid()){
 714                 titleCacheCompareStr = titleCacheCompareStr.replace(" x ", " ×");
 715                 nameCompareStr = nameCompareStr.replace(" x ", " ×");
 716             }
 717             if(taxonName.isMonomHybrid()){
 718                 titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
 719                 nameCompareStr = nameCompareStr.replace("^X ", "× ");
 720             }
 721             if(authorStr != null && authorStr.contains(" et ")){
 722                 titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
 723             }
 724             if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
 725                 logger.warn(csvReportLine(regNumber, "The generated titleCache differs from the imported string", taxonNameTitleCache, " != ", titleCacheStr, " ==> original titleCacheStr has been restored"));
 726                 doRestoreTitleCacheStr = true;
 727             }
 728             if (!nameCache.trim().equals(nameCompareStr)) {
 729                 logger.warn(csvReportLine(regNumber, "The parsed nameCache differs from field '" + NAMESTRING + "'", nameCache, " != ", nameCompareStr));
 730             }
 731
 732             //  Author
 733             //nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
 734             //if (!titleCacheStr.equals(taxonName.getTitleCache())) {
 735             //    logger.warn(regNumber + ": titleCache has changed after setting authors, will restore original titleCacheStr");
 736             //    doRestoreTitleCacheStr = true;
 737             //}
 738
 739             if(doRestoreTitleCacheStr){
 740                 taxonName.setTitleCache(titleCacheStr, true);
 741             }
 742
 743             // deduplicate
 744             replaceAuthorNamesAndNomRef(state, taxonName);
 745         }
 746
 747         // Annotations
 748         if(!nameAnnotations.isEmpty()){
 749             for(String text : nameAnnotations.keySet()){
 750                 taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
 751             }
 752             getNameService().save(taxonName);
 753         }
 754         return taxonName;
 755     }
 756
 757     /**
 758      * @param state
 759      * @return
 760      */
 761     private TaxonNode getClassificationRootNode(IAPTImportState state) {
 762
 763      //   Classification classification = state.getClassification();
 764      //   if (classification == null){
 765      //       IAPTImportConfigurator config = state.getConfig();
 766      //       classification = Classification.NewInstance(state.getConfig().getClassificationName());
 767      //       classification.setUuid(config.getClassificationUuid());
 768      //       classification.setReference(config.getSecReference());
 769      //       classification = getClassificationService().find(state.getConfig().getClassificationUuid());
 770      //   }
 771         TaxonNode rootNode = state.getRootNode();
 772         if (rootNode == null){
 773             rootNode = getTaxonNodeService().find(ROOT_UUID);
 774         }
 775         if (rootNode == null){
 776             Classification classification = state.getClassification();
 777             if (classification == null){
 778                 Reference sec = state.getSecReference();
 779                 String classificationName = state.getConfig().getClassificationName();
 780                 Language language = Language.DEFAULT();
 781                 classification = Classification.NewInstance(classificationName, sec, language);
 782                 state.setClassification(classification);
 783                 classification.setUuid(state.getConfig().getClassificationUuid());
 784                 classification.getRootNode().setUuid(ROOT_UUID);
 785                 getClassificationService().save(classification);
 786             }
 787             rootNode = classification.getRootNode();
 788             state.setRootNode(rootNode);
 789         }
 790         return rootNode;
 791     }
 792
 793     private Collection getCollection(String collectionCode, String instituteStr, String subCollectionStr){
 794
 795         Collection superCollection = null;
 796         if(subCollectionStr != null){
 797             superCollection = getCollection(collectionCode, instituteStr, null);
 798             collectionCode = subCollectionStr;
 799             instituteStr = null;
 800         }
 801
 802         final String key = collectionCode + "-#i:" + StringUtils.defaultString(instituteStr);
 803
 804         Collection collection = collectionMap.get(key);
 805
 806         if(collection == null) {
 807             collection = Collection.NewInstance();
 808             collection.setCode(collectionCode);
 809             if(instituteStr != null){
 810                 collection.setInstitute(Institution.NewNamedInstance(instituteStr));
 811             }
 812             if(superCollection != null){
 813                 collection.setSuperCollection(superCollection);
 814             }
 815             collectionMap.put(key, collection);
 816             getCollectionService().save(collection);
 817         }
 818
 819         return collection;
 820     }
 821
 822
 823     /**
 824      * @param record
 825      * @param originalKey
 826      * @param doUnescapeHtmlEntities
 827      * @return
 828      */
 829     private String getValue(HashMap<String, String> record, String originalKey, boolean doUnescapeHtmlEntities) {
 830         String value = record.get(originalKey);
 831
 832         value = fixCharacters(value);
 833
 834         if (! StringUtils.isBlank(value)) {
 835                 if (logger.isDebugEnabled()) {
 836                     logger.debug(originalKey + ": " + value);
 837                 }
 838                 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
 839             if(doUnescapeHtmlEntities){
 840                 value = StringEscapeUtils.unescapeHtml(value);
 841             }
 842                 return value.trim();
 843         }else{
 844                 return null;
 845         }
 846     }
 847
 848     /**
 849      * Fixes broken characters.
 850      * For details see
 851      * http://dev.e-taxonomy.eu/redmine/issues/6035
 852      *
 853      * @param value
 854      * @return
 855      */
 856     private String fixCharacters(String value) {
 857
 858         value = StringUtils.replace(value, "s$K", "š");
 859         value = StringUtils.replace(value, "n$K", "ň");
 860         value = StringUtils.replace(value, "e$K", "ě");
 861         value = StringUtils.replace(value, "r$K", "ř");
 862         value = StringUtils.replace(value, "c$K", "č");
 863         value = StringUtils.replace(value, "z$K", "ž");
 864         value = StringUtils.replace(value, "S>U$K", "Š");
 865         value = StringUtils.replace(value, "C>U$K", "Č");
 866         value = StringUtils.replace(value, "R>U$K", "Ř");
 867         value = StringUtils.replace(value, "Z>U$K", "Ž");
 868         value = StringUtils.replace(value, "g$K", "ǧ");
 869         value = StringUtils.replace(value, "s$A", "ś");
 870         value = StringUtils.replace(value, "n$A", "ń");
 871         value = StringUtils.replace(value, "c$A", "ć");
 872         value = StringUtils.replace(value, "e$E", "ę");
 873         value = StringUtils.replace(value, "o$H", "õ");
 874         value = StringUtils.replace(value, "s$C", "ş");
 875         value = StringUtils.replace(value, "t$C", "ț");
 876         value = StringUtils.replace(value, "S>U$C", "Ş");
 877         value = StringUtils.replace(value, "a$O", "å");
 878         value = StringUtils.replace(value, "A>U$O", "Å");
 879         value = StringUtils.replace(value, "u$O", "ů");
 880         value = StringUtils.replace(value, "g$B", "ğ");
 881         value = StringUtils.replace(value, "g$B", "ĕ");
 882         value = StringUtils.replace(value, "a$B", "ă");
 883         value = StringUtils.replace(value, "l$/", "ł");
 884         value = StringUtils.replace(value, ">i", "ı");
 885         value = StringUtils.replace(value, "i$U", "ï");
 886         // Special-cases
 887         value = StringUtils.replace(value, "&yacute", "ý");
 888         value = StringUtils.replace(value, ">L", "Ł"); // corrected rule
 889         value = StringUtils.replace(value, "E>U$D", "З");
 890         value = StringUtils.replace(value, "S>U$E", "Ş");
 891         value = StringUtils.replace(value, "s$E", "ş");
 892
 893         value = StringUtils.replace(value, "c$k", "č");
 894         value = StringUtils.replace(value, " U$K", " Š");
 895
 896         return value;
 897     }
 898
 899
 900     /**
 901          *  Stores taxa records in DB
 902          */
 903         @Override
 904     protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
 905
 906         String lineNumber = "L#" + state.getCurrentLine() + ": ";
 907         logger.setLevel(Level.DEBUG);
 908         HashMap<String, String> record = state.getOriginalRecord();
 909         logger.debug(lineNumber + record.toString());
 910
 911         Set<String> keys = record.keySet();
 912         for (String key: keys) {
 913             if (! expectedKeys.contains(key)){
 914                 logger.warn(lineNumber + "Unexpected Key: " + key);
 915             }
 916         }
 917
 918         String reg_id = record.get(REGISTRATIONNO_PK);
 919
 920         //higherTaxon
 921         String higherTaxaString = record.get(HIGHERTAXON);
 922         boolean isFossil = false;
 923         if(higherTaxaString.startsWith("FOSSIL ")){
 924             higherTaxaString = higherTaxaString.replace("FOSSIL ", "");
 925             isFossil = true;
 926         }
 927         TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state);
 928
 929        //Taxon
 930         Taxon taxon = makeTaxon(record, state, higherTaxon, isFossil);
 931         if (taxon == null){
 932             logger.warn(lineNumber + "taxon could not be created and is null");
 933             return;
 934         }
 935         ((IAPTImportState)state).setCurrentTaxon(taxon);
 936
 937
 938                 return;
 939     }
 940
 941     private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
 942         String[] higherTaxaNames = higherTaxaString.toLowerCase().replaceAll("[\\[\\]]", "").split(":");
 943         TaxonNode higherTaxonNode = null;
 944
 945         ITaxonTreeNode rootNode = getClassificationRootNode(state);
 946         for (String htn :  higherTaxaNames) {
 947             htn = StringUtils.capitalize(htn.trim());
 948             Taxon higherTaxon = state.getHigherTaxon(htn);
 949             if (higherTaxon != null){
 950                 higherTaxonNode = higherTaxon.getTaxonNodes().iterator().next();
 951             }else{
 952                 BotanicalName name = makeHigherTaxonName(state, htn);
 953                 Reference sec = state.getSecReference();
 954                 higherTaxon = Taxon.NewInstance(name, sec);
 955                 getTaxonService().save(higherTaxon);
 956                 higherTaxonNode = rootNode.addChildTaxon(higherTaxon, sec, null);
 957                 state.putHigherTaxon(htn, higherTaxon);
 958                 getClassificationService().saveTreeNode(higherTaxonNode);
 959             }
 960             rootNode = higherTaxonNode;
 961         }
 962         return higherTaxonNode;
 963     }
 964
 965     private BotanicalName makeHigherTaxonName(IAPTImportState state, String name) {
 966
 967         Rank rank = guessRank(name);
 968
 969         BotanicalName taxonName = BotanicalName.NewInstance(rank);
 970         taxonName.addSource(makeOriginalSource(state));
 971         taxonName.setGenusOrUninomial(StringUtils.capitalize(name));
 972         return taxonName;
 973     }
 974
 975     private Rank guessRank(String name) {
 976
 977         // normalize
 978         name = name.replaceAll("\\(.*\\)", "").trim();
 979
 980         if(name.matches("^Plantae$|^Fungi$")){
 981            return Rank.KINGDOM();
 982         } else if(name.matches("^Incertae sedis$|^No group assigned$")){
 983            return rankFamilyIncertisSedis();
 984         } else if(name.matches(".*phyta$|.*mycota$")){
 985            return Rank.SECTION_BOTANY();
 986         } else if(name.matches(".*phytina$|.*mycotina$")){
 987            return Rank.SUBSECTION_BOTANY();
 988         } else if(name.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones
 989             return rankUnrankedSupraGeneric();
 990         } else if(name.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){
 991            return Rank.CLASS();
 992         } else if(name.matches(".*idae$|.*phycidae$|.*mycetidae$")){
 993            return Rank.SUBCLASS();
 994         } else if(name.matches(".*ales$")){
 995            return Rank.ORDER();
 996         } else if(name.matches(".*ineae$")){
 997            return Rank.SUBORDER();
 998         } else if(name.matches(".*aceae$")){
 999             return Rank.FAMILY();
1000         } else if(name.matches(".*oideae$")){
1001            return Rank.SUBFAMILY();
1002         } else
1003         //    if(name.matches(".*eae$")){
1004         //    return Rank.TRIBE();
1005         // } else
1006             if(name.matches(".*inae$")){
1007            return Rank.SUBTRIBE();
1008         } else if(name.matches(".*ae$")){
1009            return Rank.FAMILY();
1010         }
1011         return Rank.UNKNOWN_RANK();
1012     }
1013
1014     private Rank rankUnrankedSupraGeneric() {
1015
1016         if(rankUnrankedSupraGeneric == null){
1017             rankUnrankedSupraGeneric = Rank.NewInstance(RankClass.Suprageneric, "Unranked supra generic", " ", " ");
1018             getTermService().save(rankUnrankedSupraGeneric);
1019         }
1020         return rankUnrankedSupraGeneric;
1021     }
1022
1023     private Rank rankFamilyIncertisSedis() {
1024
1025         if(familyIncertisSedis == null){
1026             familyIncertisSedis = Rank.NewInstance(RankClass.Suprageneric, "Family incertis sedis", " ", " ");
1027             getTermService().save(familyIncertisSedis);
1028         }
1029         return familyIncertisSedis;
1030     }
1031
1032     private AnnotationType annotationTypeCaveats(){
1033         if(annotationTypeCaveats == null){
1034             annotationTypeCaveats = AnnotationType.NewInstance("Caveats", "Caveats", "");
1035             getTermService().save(annotationTypeCaveats);
1036         }
1037         return annotationTypeCaveats;
1038     }
1039
1040
1041     /**
1042      * @param state
1043      * @return
1044      */
1045     private IdentifiableSource makeOriginalSource(IAPTImportState state) {
1046         return IdentifiableSource.NewDataImportInstance("line: " + state.getCurrentLine(), null, state.getConfig().getSourceReference());
1047     }
1048
1049
1050     private Reference makeReference(IAPTImportState state, UUID uuidRef) {
1051         Reference ref = state.getReference(uuidRef);
1052         if (ref == null){
1053             ref = getReferenceService().find(uuidRef);
1054             state.putReference(uuidRef, ref);
1055         }
1056         return ref;
1057     }
1058
1059     private MarkerType markerTypeFossil(){
1060         if(this.markerTypeFossil == null){
1061             markerTypeFossil = MarkerType.NewInstance("isFossilTaxon", "isFossil", null);
1062             getTermService().save(this.markerTypeFossil);
1063         }
1064         return markerTypeFossil;
1065     }
1066
1067     private String csvReportLine(String regId, String message, String ... fields){
1068         StringBuilder out = new StringBuilder("regID#");
1069         out.append(regId).append(",\"").append(message).append('"');
1070
1071         for(String f : fields){
1072             out.append(",\"").append(f).append('"');
1073         }
1074         return out.toString();
1075     }
1076
1077
1078 }