1
|
/**
|
2
|
*
|
3
|
*/
|
4
|
package eu.etaxonomy.cdm.common;
|
5
|
|
6
|
/**
|
7
|
* This class is a constant holder for commonly used UTF-8 characters.
|
8
|
*
|
9
|
* @author a.mueller
|
10
|
* @since 19.06.2013
|
11
|
*/
|
12
|
public enum UTF8 {
|
13
|
|
14
|
HYPHEN("\u2010"), // hyphen https://www.fileformat.info/info/unicode/char/2010/index.htm
|
15
|
HYPHEN_NO_BREAK("\u2011"), // non breaking hyphen https://www.fileformat.info/info/unicode/char/2011/index.htm
|
16
|
FIGURE_DASH("\u2012"), //figure dash https://www.fileformat.info/info/unicode/char/2012/index.htm
|
17
|
EN_DASH("\u2013"), // https://de.wikipedia.org/wiki/Halbgeviertstrich
|
18
|
EM_DASH("\u2014"), // https://de.wikipedia.org/wiki/Geviertstrich
|
19
|
BAR_HORIZON("\u2015"), // horizontal bar https://www.fileformat.info/info/unicode/char/2015/index.htm
|
20
|
EM_DASH_DOUBLE("\u2E3A"), //https://de.wikipedia.org/wiki/Doppelgeviertstrich
|
21
|
SPATIUM("\u202F"), //very short non-breaking space
|
22
|
EN_DASH_SPATIUM("\u202F\u2013\u202F"),
|
23
|
HYBRID ("\u00D7"), // hybrid sign
|
24
|
SHARP_S("\u00DF"),
|
25
|
a_UMLAUT("\u00E4"), //small a umlaut, latin small letter a with diaeresis
|
26
|
O_UMLAUT("\u00F6"), //small o umlaut, latin small letter o with diaeresis
|
27
|
U_UMLAUT("\u00FC"), //small u umlaut, latin small letter u with diaeresis
|
28
|
SMALL_O_WITH_STROKE("\u00F8"), //"Danish" o
|
29
|
NO_BREAK_SPACE("\u00A0"),
|
30
|
POLISH_L("\u0142"),
|
31
|
SMALL_A_ACUTE("\u00E1"),
|
32
|
SMALL_O_ACUTE("\u00F3"),
|
33
|
SMALL_E_ACUTE("\u00E9"),
|
34
|
REGEX_NOT("U+005E"), //Circumflex Accent, used in regular expression for negation (e.g not a: [^a]
|
35
|
QUOT_SINGLE_RIGHT("\u2019"), // Right single quotation mark
|
36
|
QUOT_SINGLE_HIGH_REV9("\u201b"), // Left high single quotation mark
|
37
|
QUOT_DBL_LEFT("\u201c"), //LEFT DOUBLE QUOTATION MARK Left English quotation mark
|
38
|
QUOT_DBL_RIGHT("\u201d"), //RIGHT DOUBLE QUOTATION MARK Right English quotation mark
|
39
|
QUOT_DBL_LOW9("\u201e"), //DOUBLE LOW-9 QUOTATION MARK Left English quotation mark Low
|
40
|
QUOT_DBL_HIGH_REV9("\u201f"), //DOUBLE HIGH-REVERSED-9 QUOTATION MARK Right English quotation mark -‟-
|
41
|
ACUTE_ACCENT("\u00B4"), //Acute Accent, looks a bit similar to th single quotation mark
|
42
|
BLACK_CIRCLE("\u25CF"), //Black circle, symbol for endemic
|
43
|
DEGREE_SIGN("\u00B0"), //°
|
44
|
NARROW_NO_BREAK("\u202F")
|
45
|
;
|
46
|
|
47
|
private String value;
|
48
|
|
49
|
private UTF8(String value) {
|
50
|
this.value = value;
|
51
|
}
|
52
|
|
53
|
public static String ANY_DASH_RE(){
|
54
|
return SPATIUM+"?[\\-"+HYPHEN+HYPHEN_NO_BREAK+FIGURE_DASH+EN_DASH+EM_DASH+BAR_HORIZON+EM_DASH_DOUBLE+"]"+SPATIUM+"?";
|
55
|
}
|
56
|
|
57
|
@Override
|
58
|
public String toString(){
|
59
|
return value;
|
60
|
}
|
61
|
|
62
|
}
|