Revision c0e106fe
Added by Andreas Müller over 8 years ago
cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UTF8.java | ||
---|---|---|
1 |
/** |
|
2 |
* |
|
3 |
*/ |
|
4 |
package eu.etaxonomy.cdm.common; |
|
5 |
|
|
6 |
/** |
|
7 |
* This class is a constant holder for commonly used UTF-8 characters. |
|
8 |
* |
|
9 |
* @author a.mueller |
|
10 |
* @since 19.06.2013 |
|
11 |
*/ |
|
12 |
public enum UTF8 { |
|
13 |
|
|
14 |
|
|
15 |
EN_DASH("\u2013"), // https://de.wikipedia.org/wiki/Halbgeviertstrich |
|
16 |
SPATIUM("\u202F"), //very short non-breaking space |
|
17 |
EN_DASH_SPATIUM("\u202F\u2013\u202F"), |
|
18 |
HYBRID ("\u00D7"), // hybrid sign |
|
19 |
SHARP_S("\u00DF"), |
|
20 |
NO_BREAK_SPACE("\u00A0"), |
|
21 |
POLISH_L("\u0142"), |
|
22 |
SMALL_A_ACUTE("\u00E1") |
|
23 |
; |
|
24 |
|
|
25 |
private String value; |
|
26 |
|
|
27 |
private UTF8(String value) { |
|
28 |
this.value = value; |
|
29 |
} |
|
30 |
|
|
31 |
public String toString(){ |
|
32 |
return value; |
|
33 |
} |
|
34 |
|
|
35 |
} |
|
1 |
/** |
|
2 |
* |
|
3 |
*/ |
|
4 |
package eu.etaxonomy.cdm.common; |
|
5 |
|
|
6 |
/** |
|
7 |
* This class is a constant holder for commonly used UTF-8 characters. |
|
8 |
* |
|
9 |
* @author a.mueller |
|
10 |
* @since 19.06.2013 |
|
11 |
*/ |
|
12 |
public enum UTF8 { |
|
13 |
|
|
14 |
|
|
15 |
EN_DASH("\u2013"), // https://de.wikipedia.org/wiki/Halbgeviertstrich |
|
16 |
SPATIUM("\u202F"), //very short non-breaking space |
|
17 |
EN_DASH_SPATIUM("\u202F\u2013\u202F"), |
|
18 |
HYBRID ("\u00D7"), // hybrid sign |
|
19 |
SHARP_S("\u00DF"), |
|
20 |
NO_BREAK_SPACE("\u00A0"), |
|
21 |
POLISH_L("\u0142"), |
|
22 |
SMALL_A_ACUTE("\u00E1"), |
|
23 |
RIGHT_SINGLE_QUOT("\u2019") // Right single quotation mark |
|
24 |
; |
|
25 |
|
|
26 |
private String value; |
|
27 |
|
|
28 |
private UTF8(String value) { |
|
29 |
this.value = value; |
|
30 |
} |
|
31 |
|
|
32 |
@Override |
|
33 |
public String toString(){ |
|
34 |
return value; |
|
35 |
} |
|
36 |
|
|
37 |
} |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java | ||
---|---|---|
84 | 84 |
|
85 | 85 |
|
86 | 86 |
//AuthorString |
87 |
protected static String authorPart = "(" + "(O[’']|d[’']|D[’']|L[’']|'t|ten\\s||le\\s|zur\\s)?" + "(" + capital2charDotWord + "|DC.)" + "('" + nonCapitalDotWord + ")?" + "|[vV][ao]n(\\sder)?|da|du|de(n|l|\\sla)?)" ; |
|
87 |
protected static String qm = "[" + UTF8.RIGHT_SINGLE_QUOT + "']"; |
|
88 |
protected static String authorPart = "(" + "([OdDL]"+qm+"|[’']t|ten\\s||le\\s|zur\\s)?" + "(" + capital2charDotWord + "|DC.)" + "('" + nonCapitalDotWord + ")?" + "|[vV][ao]n(\\sder)?|da|du|de(n|l|\\sla)?)" ; |
|
88 | 89 |
protected static String author = "(" + authorPart + "(" + fWs + "|-)" + ")+" + "(f\\.|fil\\.|secundus)?" ; |
89 | 90 |
protected static String finalTeamSplitter = "(" + fWs + "(&)" + fWs + "|" + oWs + "et" + oWs + ")"; |
90 | 91 |
protected static String notFinalTeamSplitter = "(?:" + fWs + "," + fWs + "|" + finalTeamSplitter + ")"; |
Also available in: Unified diff
Further fix right quotation mark parsing