From f4ce5055dcd11bc7619c00cc17d615f48d524066 Mon Sep 17 00:00:00 2001 From: Andreas Kohlbecker Date: Thu, 15 Sep 2016 17:51:05 +0200 Subject: [PATCH] ref #6026 improving specimenType parsers: better recognition of 'Coll. something' types --- .../java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java | 6 +++--- .../test/java/eu/etaxonomy/cdm/io/iapt/IAPTImportTest.java | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java b/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java index 3e32113a..80d98add 100644 --- a/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java +++ b/app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java @@ -95,7 +95,7 @@ public class IAPTExcelImport extends Simp Pattern.compile("^(?[IVX]{1,2})([\\.\\-/])(?(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969 Pattern.compile("^(?[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?\\p{L}+\\.?),?\\s?(?(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999 }; - private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?.*?))(?:[Hh]olotype:(?.*?)\\.?)?(?:[Ii]sotype[^:]*:(?.*)\\.?)?\\.?$"); + private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?.*?))(?:[Hh]olotype:(?.*?)\\.?)?(?:[Ii]sotype.*?[:\\(](?.*)\\.?)?\\.?$"); private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?[^\\)]*).*$"); private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]' @@ -111,8 +111,8 @@ public class IAPTExcelImport extends Simp private static final Pattern[] specimenTypePatterns = new Pattern[]{ Pattern.compile("^(?[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?.*[^\\)])\\))(?.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222 Pattern.compile("^(?[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?[^\\.,;]*)(.))(?.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78 - Pattern.compile("^(?Coll\\.\\s.*?)\\s+(?(Praep|slide).*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62 - // Pattern.compile("^.*(?Praep.*)$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62 + Pattern.compile("^(?:in\\s)?(?[Cc]oll\\.\\s.*?)(?:\\s+(?(Praep\\.|slide|No\\.|Inv\\. Nr\\.|Nr\\.).*))?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62 + Pattern.compile("^(?Inst\\.\\s.*?)\\s+(?N\\s.*)?$"), // like Inst. Geological Sciences, Acad. Sci. Belarus, Minsk N 212 A Pattern.compile("^(?[A-Z]+)(?:\\s+(?.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any }; diff --git a/app-import/src/test/java/eu/etaxonomy/cdm/io/iapt/IAPTImportTest.java b/app-import/src/test/java/eu/etaxonomy/cdm/io/iapt/IAPTImportTest.java index 2bdc5f0e..2089bf92 100644 --- a/app-import/src/test/java/eu/etaxonomy/cdm/io/iapt/IAPTImportTest.java +++ b/app-import/src/test/java/eu/etaxonomy/cdm/io/iapt/IAPTImportTest.java @@ -65,6 +65,13 @@ public class IAPTImportTest { "KASSEL Coll. Krasske, Praep. DII 78", "Coll. Lange-Bertalot, Botanisches Institut, Frankfurt am Main slide Eh-B 91", "Coll. Østrup, Botan. Museum Copenhagen, Dänemark Praep. 3944", + "Coll. L.P.B.V. No. 0736", + "Coll. Ruhr University-Bochum, Inst. of Geology No. 11532", + "Coll. Paläontol. Inst. Univ. Bucuresti. Nr. 2515", + "Coll. Dr.h.c. R. Mundlos (Bad Friedrichshall, später Stuttgart) Inv. Nr. P 1396", + "Inst. Geological Sciences, Acad. Sci. Belarus, Minsk N 212 A", + "Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany", + "in coll. H. F. Paulus (Wien)", }; -- 2.34.1