Project

General

Profile

« Previous | Next » 

Revision c0e106fe

Added by Andreas Müller over 8 years ago

Further fix right quotation mark parsing

View differences:

cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/UTF8.java
1
/**
2
 * 
3
 */
4
package eu.etaxonomy.cdm.common;
5

  
6
/**
7
 * This class is a constant holder for commonly used UTF-8 characters.
8
 *  
9
 * @author a.mueller
10
 * @since 19.06.2013
11
 */
12
public enum UTF8 {
13
	
14
	
15
	EN_DASH("\u2013"),   // https://de.wikipedia.org/wiki/Halbgeviertstrich
16
	SPATIUM("\u202F"),   //very short non-breaking space
17
	EN_DASH_SPATIUM("\u202F\u2013\u202F"),
18
	HYBRID ("\u00D7"),   // hybrid sign
19
	SHARP_S("\u00DF"),
20
	NO_BREAK_SPACE("\u00A0"),
21
	POLISH_L("\u0142"),
22
	SMALL_A_ACUTE("\u00E1")
23
	;
24

  
25
	private String value;
26
	
27
	private UTF8(String value) {
28
		this.value = value;
29
	}
30
	
31
	public String toString(){
32
		return value;
33
	}
34
	
35
}
1
/**
2
 *
3
 */
4
package eu.etaxonomy.cdm.common;
5

  
6
/**
7
 * This class is a constant holder for commonly used UTF-8 characters.
8
 *
9
 * @author a.mueller
10
 * @since 19.06.2013
11
 */
12
public enum UTF8 {
13

  
14

  
15
	EN_DASH("\u2013"),   // https://de.wikipedia.org/wiki/Halbgeviertstrich
16
	SPATIUM("\u202F"),   //very short non-breaking space
17
	EN_DASH_SPATIUM("\u202F\u2013\u202F"),
18
	HYBRID ("\u00D7"),   // hybrid sign
19
	SHARP_S("\u00DF"),
20
	NO_BREAK_SPACE("\u00A0"),
21
	POLISH_L("\u0142"),
22
	SMALL_A_ACUTE("\u00E1"),
23
	RIGHT_SINGLE_QUOT("\u2019") // Right single quotation mark
24
	;
25

  
26
	private String value;
27

  
28
	private UTF8(String value) {
29
		this.value = value;
30
	}
31

  
32
	@Override
33
    public String toString(){
34
		return value;
35
	}
36

  
37
}
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java
84 84

  
85 85

  
86 86
    //AuthorString
87
	protected static String authorPart = "(" + "(O[’']|d[’']|D[’']|L[’']|'t|ten\\s||le\\s|zur\\s)?" + "(" + capital2charDotWord + "|DC.)" + "('" + nonCapitalDotWord + ")?" + "|[vV][ao]n(\\sder)?|da|du|de(n|l|\\sla)?)" ;
87
    protected static String qm = "[" + UTF8.RIGHT_SINGLE_QUOT + "']";
88
    protected static String authorPart = "(" + "([OdDL]"+qm+"|[’']t|ten\\s||le\\s|zur\\s)?" + "(" + capital2charDotWord + "|DC.)" + "('" + nonCapitalDotWord + ")?" + "|[vV][ao]n(\\sder)?|da|du|de(n|l|\\sla)?)" ;
88 89
    protected static String author = "(" + authorPart + "(" + fWs + "|-)" + ")+" + "(f\\.|fil\\.|secundus)?" ;
89 90
    protected static String finalTeamSplitter = "(" + fWs + "(&)" + fWs + "|" + oWs + "et" + oWs + ")";
90 91
    protected static String notFinalTeamSplitter = "(?:" + fWs + "," + fWs + "|" + finalTeamSplitter + ")";

Also available in: Unified diff