From: Andreas Müller Date: Thu, 14 Jan 2016 15:33:04 +0000 (+0100) Subject: Further fix right quotation mark parsing X-Git-Tag: 4.0.0^2~212 X-Git-Url: https://dev.e-taxonomy.eu/gitweb/cdmlib.git/commitdiff_plain/c0e106fe949ba0e81550cdb05f12fbd8aa43e23e Further fix right quotation mark parsing --- diff --git a/cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UTF8.java b/cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UTF8.java index e9b65ebe63..b8cc084564 100644 --- a/cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UTF8.java +++ b/cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UTF8.java @@ -1,35 +1,37 @@ -/** - * - */ -package eu.etaxonomy.cdm.common; - -/** - * This class is a constant holder for commonly used UTF-8 characters. - * - * @author a.mueller - * @since 19.06.2013 - */ -public enum UTF8 { - - - EN_DASH("\u2013"), // https://de.wikipedia.org/wiki/Halbgeviertstrich - SPATIUM("\u202F"), //very short non-breaking space - EN_DASH_SPATIUM("\u202F\u2013\u202F"), - HYBRID ("\u00D7"), // hybrid sign - SHARP_S("\u00DF"), - NO_BREAK_SPACE("\u00A0"), - POLISH_L("\u0142"), - SMALL_A_ACUTE("\u00E1") - ; - - private String value; - - private UTF8(String value) { - this.value = value; - } - - public String toString(){ - return value; - } - -} +/** + * + */ +package eu.etaxonomy.cdm.common; + +/** + * This class is a constant holder for commonly used UTF-8 characters. + * + * @author a.mueller + * @since 19.06.2013 + */ +public enum UTF8 { + + + EN_DASH("\u2013"), // https://de.wikipedia.org/wiki/Halbgeviertstrich + SPATIUM("\u202F"), //very short non-breaking space + EN_DASH_SPATIUM("\u202F\u2013\u202F"), + HYBRID ("\u00D7"), // hybrid sign + SHARP_S("\u00DF"), + NO_BREAK_SPACE("\u00A0"), + POLISH_L("\u0142"), + SMALL_A_ACUTE("\u00E1"), + RIGHT_SINGLE_QUOT("\u2019") // Right single quotation mark + ; + + private String value; + + private UTF8(String value) { + this.value = value; + } + + @Override + public String toString(){ + return value; + } + +} diff --git a/cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java b/cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java index 599afcd25d..abb7171a07 100644 --- a/cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java +++ b/cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java @@ -84,7 +84,8 @@ public abstract class NonViralNameParserImplRegExBase { //AuthorString - protected static String authorPart = "(" + "(O[’']|d[’']|D[’']|L[’']|'t|ten\\s||le\\s|zur\\s)?" + "(" + capital2charDotWord + "|DC.)" + "('" + nonCapitalDotWord + ")?" + "|[vV][ao]n(\\sder)?|da|du|de(n|l|\\sla)?)" ; + protected static String qm = "[" + UTF8.RIGHT_SINGLE_QUOT + "']"; + protected static String authorPart = "(" + "([OdDL]"+qm+"|[’']t|ten\\s||le\\s|zur\\s)?" + "(" + capital2charDotWord + "|DC.)" + "('" + nonCapitalDotWord + ")?" + "|[vV][ao]n(\\sder)?|da|du|de(n|l|\\sla)?)" ; protected static String author = "(" + authorPart + "(" + fWs + "|-)" + ")+" + "(f\\.|fil\\.|secundus)?" ; protected static String finalTeamSplitter = "(" + fWs + "(&)" + fWs + "|" + oWs + "et" + oWs + ")"; protected static String notFinalTeamSplitter = "(?:" + fWs + "," + fWs + "|" + finalTeamSplitter + ")";