X-Git-Url: https://dev.e-taxonomy.eu/gitweb/cdmlib-apps.git/blobdiff_plain/1968849da853edb6c43dc012065d7181bd176684..5e52d4ec10f1bf3fa1ce10019c55cbf0aadfad80:/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java index a77978d5..3e32113a 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java @@ -10,6 +10,7 @@ package eu.etaxonomy.cdm.io.iapt; import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade; +import eu.etaxonomy.cdm.api.service.pager.Pager; import eu.etaxonomy.cdm.common.CdmUtils; import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport; import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState; @@ -21,8 +22,11 @@ import eu.etaxonomy.cdm.model.name.*; import eu.etaxonomy.cdm.model.occurrence.*; import eu.etaxonomy.cdm.model.occurrence.Collection; import eu.etaxonomy.cdm.model.reference.Reference; +import eu.etaxonomy.cdm.model.reference.ReferenceFactory; +import eu.etaxonomy.cdm.model.reference.ReferenceType; import eu.etaxonomy.cdm.model.taxon.*; import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; +import eu.etaxonomy.cdm.strategy.parser.ParserProblem; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; @@ -74,7 +78,7 @@ public class IAPTExcelImport extends Simp private static List expectedKeys= Arrays.asList(new String[]{ REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING}); - private static final Pattern nomRefTokenizeP = Pattern.compile("^(.*):\\s([^\\.:]+)\\.(.*?)\\.?$"); + private static final Pattern nomRefTokenizeP = Pattern.compile("^(?.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)?\\.?$"); private static final Pattern[] datePatterns = new Pattern[]{ // NOTE: // The order of the patterns is extremely important!!! @@ -84,16 +88,20 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999 Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12 Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969 - Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<month>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969 - Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999 + Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<monthName>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969 + Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)?(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999 Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969 Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),// partial date like 1999-04 - Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969 + Pattern.compile("^(?<monthName>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969 Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999 }; - private static final Pattern typeSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$"); + private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$"); - private static final Pattern collectorPattern = Pattern.compile(".*?\\(leg\\.\\s+([^\\)]*)\\)|.*?\\sleg\\.\\s+(.*?)\\.?$"); + private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$"); + private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]' + private static final Pattern typeNameSpecialSplitPattern = Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)"); + + private static final Pattern collectorPattern = Pattern.compile(".*?(?<fullStr1>\\(leg\\.\\s+(?<data1>[^\\)]*)\\))|.*?(?<fullStr2>\\sleg\\.\\s+(?<data2>.*?)\\.?)$"); private static final Pattern collectionDataPattern = Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$"); private static final Pattern collectorsNumber = Pattern.compile("^([nN]o\\.\\s.*)$"); @@ -103,7 +111,8 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp private static final Pattern[] specimenTypePatterns = new Pattern[]{ Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222 Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78 - Pattern.compile("^(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<institute>.*?)(?<accNumber>Praep\\..*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62 + Pattern.compile("^(?<institute>Coll\\.\\s.*?)\\s+(?<accNumber>(Praep|slide).*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62 + // Pattern.compile("^.*(?<accNumber>Praep.*)$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62 Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any }; @@ -161,11 +170,16 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp private Rank familyIncertisSedis = null; private AnnotationType annotationTypeCaveats = null; + private Reference bookVariedadesTradicionales = null; + + /** + * HACK for unit simple testing + */ + boolean _testMode = System.getProperty("TEST_MODE") != null; + private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state, TaxonNode higherTaxonNode, boolean isFossil) { - String line = state.getCurrentLine() + ": "; - String regNumber = getValue(record, REGISTRATIONNO_PK, false); String regStr = getValue(record, REGISTRATION, true); String titleCacheStr = getValue(record, FULLNAME, true); @@ -176,6 +190,8 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp String notesTxt = getValue(record, NOTESTXT, true); String caveats = getValue(record, CAVEATS, true); String fullSynSubstStr = getValue(record, FULLSYNSUBST, true); + String fullBasionymStr = getValue(record, FULLBASIONYM, true); + String basionymNameStr = getValue(record, FULLBASIONYM, true); String synSubstStr = getValue(record, SYNSUBSTSTR, true); String typeStr = getValue(record, TYPE, true); @@ -183,16 +199,37 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp String nomRefTitle = null; String nomRefDetail; String nomRefPupDate = null; + String nomRefIssue = null; Partial pupDate = null; + boolean restoreOriginalReference = false; + boolean nameIsValid = true; + // preprocess nomRef: separate citation, reference detail, publishing date if(!StringUtils.isEmpty(nomRefStr)){ nomRefStr = nomRefStr.trim(); + + // handle the special case which is hard to parse: + // + // Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita: 154. 1997. + if(nomRefStr.startsWith("Las variedades tradicionales de frutales ")){ + + if(bookVariedadesTradicionales == null){ + bookVariedadesTradicionales = ReferenceFactory.newBook(); + bookVariedadesTradicionales.setTitle("Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita"); + bookVariedadesTradicionales.setDatePublished(TimePeriod.NewInstance(1997)); + getReferenceService().save(bookVariedadesTradicionales); + } + nomRefStr = nomRefStr.replaceAll("^.*?\\:.*?\\:", "Las variedades tradicionales:"); + restoreOriginalReference = true; + } + Matcher m = nomRefTokenizeP.matcher(nomRefStr); if(m.matches()){ - nomRefTitle = m.group(1); - nomRefDetail = m.group(2); - nomRefPupDate = m.group(3).trim(); + nomRefTitle = m.group("title"); + nomRefDetail = m.group("detail"); + nomRefPupDate = m.group("date").trim(); + nomRefIssue = m.group("issue"); pupDate = parseDate(regNumber, nomRefPupDate); if (pupDate != null) { @@ -214,19 +251,35 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp "\n - '" + REGISTRATION + "': " + regStr , AnnotationType.TECHNICAL(), Language.DEFAULT())); + if(restoreOriginalReference){ + taxonName.setNomenclaturalReference(bookVariedadesTradicionales); + } if(pupDate != null) { taxonName.getNomenclaturalReference().setDatePublished(TimePeriod.NewInstance(pupDate)); } + if(nomRefIssue != null) { + ((Reference)taxonName.getNomenclaturalReference()).setVolume(nomRefIssue); + } + if(!StringUtils.isEmpty(notesTxt)){ notesTxt = notesTxt.replace("Notes: ", "").trim(); taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT())); + nameIsValid = false; + } if(!StringUtils.isEmpty(caveats)){ caveats = caveats.replace("Caveats: ", "").trim(); taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT())); + nameIsValid = false; + } + + if(nameIsValid){ + // Status is always considered valid if no notes and cavets are set + taxonName.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.VALID())); } - // + + getNameService().save(taxonName); // Namerelations if(!StringUtils.isEmpty(authorsSpelling)){ @@ -265,6 +318,19 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp Reference sec = state.getConfig().getSecReference(); Taxon taxon = Taxon.NewInstance(taxonName, sec); + // Basionym + if(fullBasionymStr != null){ + fullBasionymStr = fullBasionymStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: " + basionymNameStr = basionymNameStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: " + BotanicalName basionym = makeBotanicalName(state, regNumber, fullBasionymStr, basionymNameStr, null, null); + getNameService().save(basionym); + taxonName.addBasionym(basionym); + + Synonym syn = Synonym.NewInstance(basionym, sec); + taxon.addSynonym(syn, SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF()); + getTaxonService().save(syn); + } + // Markers if(isFossil){ taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true)); @@ -272,22 +338,41 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp // Types if(!StringUtils.isEmpty(typeStr)){ - makeTypeData(typeStr, taxonName, regNumber, state); + + if(taxonName.getRank().isSpecies() || taxonName.getRank().isLower(Rank.SPECIES())) { + makeSpecimenTypeData(typeStr, taxonName, regNumber, state); + } else { + makeNameTypeData(typeStr, taxonName, regNumber, state); + } } getTaxonService().save(taxon); + + if(taxonName.getRank().equals(Rank.SPECIES()) || taxonName.getRank().isLower(Rank.SPECIES())){ + // try to find the genus, it should have been imported already, Genera are coming first in the import file + Taxon genus = ((IAPTImportState)state).getGenusTaxonMap().get(taxonName.getGenusOrUninomial()); + if(genus != null){ + higherTaxonNode = genus.getTaxonNodes().iterator().next(); + } else { + logger.info(csvReportLine(regNumber, "Parent genus not found for", nameStr)); + } + } + if(higherTaxonNode != null){ higherTaxonNode.addChildTaxon(taxon, null, null); getTaxonNodeService().save(higherTaxonNode); } - return taxon; + if(taxonName.getRank().isGenus()){ + ((IAPTImportState)state).getGenusTaxonMap().put(taxonName.getGenusOrUninomial(), taxon); + } + return taxon; } - private void makeTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) { + private void makeSpecimenTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) { - Matcher m = typeSplitPattern.matcher(typeStr); + Matcher m = typeSpecimenSplitPattern.matcher(typeStr); if(m.matches()){ String fieldUnitStr = m.group(TypesName.fieldUnit.name()); @@ -295,6 +380,7 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp FieldUnit fieldUnit = parseFieldUnit(fieldUnitStr, regNumber, state); if(fieldUnit == null) { // create a field unit with only a titleCache using the fieldUnitStr substring + logger.warn(csvReportLine(regNumber, "Type: fieldUnitStr can not be parsed", fieldUnitStr)); fieldUnit = FieldUnit.NewInstance(); fieldUnit.setTitleCache(fieldUnitStr, true); getOccurrenceService().save(fieldUnit); @@ -310,11 +396,81 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp FieldUnit fieldUnit = FieldUnit.NewInstance(); fieldUnit.setTitleCache(typeStr, true); getOccurrenceService().save(fieldUnit); - logger.warn(csvReportLine(regNumber, "Type field can not be parsed", typeStr)); + logger.warn(csvReportLine(regNumber, "Type: field 'Type' can not be parsed", typeStr)); } getNameService().save(taxonName); } + private void makeNameTypeData(String typeStr, BotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) { + + String nameStr = typeStr.replaceAll("^Type\\s?\\:\\s?", ""); + if(nameStr.isEmpty()) { + return; + } + + String basionymNameStr = null; + String noteStr = null; + String agentStr = null; + + Matcher m; + + if(typeStr.startsWith("not to be indicated")){ + // Special case: + // Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez + // FIXME + m = typeNameSpecialSplitPattern.matcher(nameStr); + if(m.matches()){ + nameStr = m.group("name"); + noteStr = m.group("note"); + agentStr = m.group("agent"); + // TODO better import of agent? + if(agentStr != null){ + noteStr = noteStr + ": " + agentStr; + } + } + } else { + // Generic case + m = typeNameBasionymPattern.matcher(nameStr); + if (m.find()) { + basionymNameStr = m.group("basionymName"); + if (basionymNameStr != null) { + nameStr = nameStr.replace(m.group(0), ""); + } + } + + m = typeNameNotePattern.matcher(nameStr); + if (m.find()) { + noteStr = m.group(1); + if (noteStr != null) { + nameStr = nameStr.replace(m.group(0), ""); + } + } + } + + BotanicalName typeName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null); + + if(typeName.isProtectedTitleCache() || typeName.getNomenclaturalReference() != null && typeName.getNomenclaturalReference().isProtectedTitleCache()) { + logger.warn(csvReportLine(regNumber, "NameType not parsable", typeStr, nameStr)); + } + + if(basionymNameStr != null){ + BotanicalName basionymName = (BotanicalName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null); + getNameService().save(basionymName); + typeName.addBasionym(basionymName); + } + + + NameTypeDesignation nameTypeDesignation = NameTypeDesignation.NewInstance(); + nameTypeDesignation.setTypeName(typeName); + getNameService().save(typeName); + + if(noteStr != null){ + nameTypeDesignation.addAnnotation(Annotation.NewInstance(noteStr, AnnotationType.EDITORIAL(), Language.UNKNOWN_LANGUAGE())); + } + taxonName.addNameTypeDesignation(typeName, null, null, null, null, false); + + } + /** * Currently only parses the collector, fieldNumber and the collection date. * @@ -329,20 +485,27 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp Matcher m1 = collectorPattern.matcher(fieldUnitStr); if(m1.matches()){ - String collectionData = m1.group(1); // like (leg. Metzeltin, 30. 9. 1996) - if(collectionData == null){ - collectionData = m1.group(2); // like leg. Metzeltin, 30. 9. 1996 + + String collectorData = m1.group(2); // like (leg. Metzeltin, 30. 9. 1996) + String removal = m1.group(1); + if(collectorData == null){ + collectorData = m1.group(4); // like leg. Metzeltin, 30. 9. 1996 + removal = m1.group(3); } - if(collectionData == null){ + if(collectorData == null){ return null; } + // the fieldUnitStr is parsable + // remove all collectorData from the fieldUnitStr and use the rest as locality + String locality = fieldUnitStr.replace(removal, ""); + String collectorStr = null; String detailStr = null; Partial date = null; String fieldNumber = null; - Matcher m2 = collectionDataPattern.matcher(collectionData); + Matcher m2 = collectionDataPattern.matcher(collectorData); if(m2.matches()){ collectorStr = m2.group("collector"); detailStr = m2.group("detail"); @@ -361,40 +524,44 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp } if(date == null && fieldNumber == null){ // detailed parsing not possible, so need fo fallback - collectorStr = collectionData; + collectorStr = collectorData; } } - if(collectorStr != null) { - fieldUnit = FieldUnit.NewInstance(); - GatheringEvent ge = GatheringEvent.NewInstance(); + if(collectorStr == null) { + collectorStr = collectorData; + } - TeamOrPersonBase agent = state.getAgentBase(collectorStr); - if(agent == null) { - agent = Person.NewTitledInstance(collectorStr); - getAgentService().save(agent); - state.putAgentBase(collectorStr, agent); - } - ge.setCollector(agent); + fieldUnit = FieldUnit.NewInstance(); + GatheringEvent ge = GatheringEvent.NewInstance(); + ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE())); - if(date != null){ - ge.setGatheringDate(date); - } + TeamOrPersonBase agent = state.getAgentBase(collectorStr); + if(agent == null) { + agent = Person.NewTitledInstance(collectorStr); + getAgentService().save(agent); + state.putAgentBase(collectorStr, agent); + } + ge.setCollector(agent); - getEventBaseService().save(ge); - fieldUnit.setGatheringEvent(ge); + if(date != null){ + ge.setGatheringDate(date); + } - if(fieldNumber != null) { - fieldUnit.setFieldNumber(fieldNumber); - } - getOccurrenceService().save(fieldUnit); + getEventBaseService().save(ge); + fieldUnit.setGatheringEvent(ge); + + if(fieldNumber != null) { + fieldUnit.setFieldNumber(fieldNumber); } + getOccurrenceService().save(fieldUnit); + } return fieldUnit; } - private Partial parseDate(String regNumber, String dateStr) { + protected Partial parseDate(String regNumber, String dateStr) { Partial pupDate = null; boolean parseError = false; @@ -539,11 +706,12 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp * @param regNumber * @return */ - private DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) { + protected DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) { DerivedUnit specimen = null; String collectionCode = null; + String collectionTitle = null; String subCollectionStr = null; String instituteStr = null; String accessionNumber = null; @@ -571,21 +739,23 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp for (Pattern p : specimenTypePatterns) { Matcher m = p.matcher(text); if (m.matches()) { - // collection code is mandatory + // collection code or collectionTitle is mandatory try { collectionCode = m.group("colCode"); } catch (IllegalArgumentException e){ // match group colCode not found } + try { - subCollectionStr = m.group("subCollection"); + instituteStr = m.group("institute"); } catch (IllegalArgumentException e){ - // match group subCollection not found + // match group col_name not found } + try { - instituteStr = m.group("institute"); + subCollectionStr = m.group("subCollection"); } catch (IllegalArgumentException e){ - // match group col_name not found + // match group subCollection not found } try { accessionNumber = m.group("accNumber"); @@ -614,7 +784,7 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp } if(collectionCode == null && instituteStr == null){ - logger.warn(csvReportLine(regNumber, "neither 'collectionCode' nor 'institute' found in ", text)); + logger.warn(csvReportLine(regNumber, "Type: neither 'collectionCode' nor 'institute' found in ", text)); continue; } collection = getCollection(collectionCode, instituteStr, subCollectionStr); @@ -624,10 +794,10 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp } } if(specimen == null) { - logger.warn(csvReportLine(regNumber, "Could not parse specimen fieldUnit", typeName.name().toString(), text)); + logger.warn(csvReportLine(regNumber, "Type: Could not parse specimen", typeName.name().toString(), text)); } if(unusualAccessionNumber){ - logger.warn(csvReportLine(regNumber, "Unusual accession number", typeName.name().toString(), text, accessionNumber)); + logger.warn(csvReportLine(regNumber, "Type: Unusual accession number", typeName.name().toString(), text, accessionNumber)); } return specimen; } @@ -653,7 +823,9 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){ nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL()); titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim(); - authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim(); + if(authorStr != null) { + authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim(); + } } // parse the full taxon name @@ -716,8 +888,12 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp for(String text : nameAnnotations.keySet()){ taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT())); } - getNameService().save(taxonName); } + + taxonName.addSource(OriginalSourceType.Import, regNumber, null, state.getConfig().getSourceReference(), null); + + getNameService().save(taxonName); + return taxonName; } @@ -780,7 +956,9 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp collection.setSuperCollection(superCollection); } collectionMap.put(key, collection); - getCollectionService().save(collection); + if(!_testMode) { + getCollectionService().save(collection); + } } return collection; @@ -860,6 +1038,15 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp value = StringUtils.replace(value, "c$k", "č"); value = StringUtils.replace(value, " U$K", " Š"); + value = StringUtils.replace(value, "O>U>!", "Ø"); + value = StringUtils.replace(value, "o>!", "ø"); + value = StringUtils.replace(value, "S$K", "Ŝ"); + value = StringUtils.replace(value, ">l", "ğ"); + + value = StringUtils.replace(value, "§B>i", "ł"); + + + return value; } @@ -902,6 +1089,7 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp ((IAPTImportState)state).setCurrentTaxon(taxon); + logger.info("#of imported Genera: " + ((IAPTImportState) state).getGenusTaxonMap().size()); return; } @@ -949,9 +1137,9 @@ public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends Simp } else if(name.matches("^Incertae sedis$|^No group assigned$")){ return rankFamilyIncertisSedis(); } else if(name.matches(".*phyta$|.*mycota$")){ - return Rank.SECTION_BOTANY(); + return Rank.PHYLUM(); } else if(name.matches(".*phytina$|.*mycotina$")){ - return Rank.SUBSECTION_BOTANY(); + return Rank.SUBPHYLUM(); } else if(name.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones return rankUnrankedSupraGeneric(); } else if(name.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){