Project

General

Profile

« Previous | Next » 

Revision 4f5219a9

Added by Andreas Müller almost 8 years ago

#5909 Improve referenced name parser

View differences:

cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImpl.java
535 535
			if (bookMatcher.matches() ){
536 536
				result = parseBook(strReference);
537 537
			}else{
538
				logger.warn("Non-InRef must be book but does not match book");
538
				logger.warn("Non-InRef must be book but does not match book: "+ strReference);
539 539
				result = ReferenceFactory.newBook();
540 540
				makeUnparsableRefTitle(result, strReference);
541 541
			}
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java
45 45
    protected static String nonCapitalWord = "\\p{javaLowerCase}+";
46 46
    protected static String word = "(" + capitalWord + "|" + nonCapitalWord + ")"; //word (capital or non-capital) with no '.' at the end
47 47
    protected static String uppercaseWord = "\\p{javaUpperCase}{2,}";
48
    protected static String apostrophWord = word + "('\\p{javaLowerCase}*)?";
48 49

  
49 50
    protected static String capitalDotWord = capitalWord + "\\.?"; //capitalWord with facultativ '.' at the end
50 51
    protected static String capital2charDotWord = "(" + capital2LetterWord + "\\.?|\\p{javaUpperCase}\\.)"; //capitalWord with facultativ '.' but minimum 2 characters (single capital word like 'L' is not allowed
52
    protected static String twoCapitalDotWord = "\\p{javaUpperCase}{2}\\.";   //e.g. NY.
53

  
51 54
    protected static String nonCapitalDotWord = nonCapitalWord + "\\.?"; //nonCapitalWord with facultativ '.' at the end
52 55
    protected static String dotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
53 56
    protected static String obligateDotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.+"; //word (capital or non-capital) with obligate '.' at the end
......
127 130
    protected static String pBracketNr = "\\[" + nr4 + "\\]";
128 131
    protected static String pFolBracket = "\\[fol\\." + fWs + "\\d{1,2}(-\\d{1,2})?\\]";
129 132

  
130
    protected static String pStrTab = "tab\\." + fWs + nr4 + "(" + fWs + "(B|\u00DF|\\(\\d{1,3}\\)))?";
131
    protected static String pFig = "fig\\." + fWs + nr4 + "[a-z]?";
132
    protected static String pFigs = pFig + "(-" + nr4 + ")?";
133
    protected static String pStrTab = "[tT]((ab)?\\.|ab\\s)" + fWs + nr4 + "(" + fWs + "(B|\u00DF|\\(\\d{1,3}\\)))?";
134
    protected static String pFig = "[fF]((ig)?\\.|ig\\s)" + fWs + nr4 + "([a-zA-Z]([-\u2013,]\\s*[a-zA-Z])?)?";
135
    protected static String pFigs = pFig + "([-\u2013]" + nr4 + ")?";
136
    protected static String pPlate = "[pP]((l)?\\.|l\\s)" + fWs + nr4 + "([a-zA-Z]([-\u2013,]\\s*[a-zA-Z])?)?";
137

  
138

  
133 139
    //static String pTabFig = pStrTab + "(," + fWs + pFigs + ")?";
134
    protected static String pTabFig = "(" + pStrTab + "|" + pFigs + ")";
140
    protected static String pTabFigPl = "(" + pStrTab + "|" + pFigs + "|" +  pPlate + ")";
135 141

  
136 142
    //e.g.: p455; p.455; pp455-456; pp.455-456; pp.455,456; 455, 456; pages 456-457; pages 456,567
137
    protected static String pSinglePages = "(p\\.?)?" + fWs + pPage + "(," + pTabFig +")?";
138
    protected static String pMultiPages = "(pp\\.?|pages)?" + fWs + pPage + fWs + "(-|,)" +fWs + pPage ;
143
    protected static String pSinglePages = "(p\\.?)?" + fWs + pPage + "(," + pTabFigPl +")?";
144
    protected static String pMultiPages = "(pp\\.?|pages)?" + fWs + pPage + fWs + "[-\u2013,]" +fWs + pPage ;
139 145
    //static String pPages = pPage + "(," + fWs + "(" + pPage + "|" + pTabFig + ")" + ")?";
140 146
    protected static String pPages = "(" + pSinglePages +"|" + pMultiPages +")";
147
    protected static String pPagesTabFig = pPages +"[,\\.]" + fWs + pTabFigPl;
148

  
141 149

  
142 150

  
143 151
    protected static String pCouv = "couv\\." + fWs + "\\d{1,3}";
......
164 172
    protected static String pRomNr = "ljfweffaflas"; //TODO rom number have to be tested first
165 173

  
166 174
    protected static String pDetailAlternatives = "(" + pPages + "|" + pPageSpecial + "|" + pStrNo + "|" + pBracketNr +
167
    			"|" + pTabFig + "|" + pTabSpecial + "|" + pFolBracket + "|" + pCouv + "|" + pRomNr + "|" +
168
    			pSpecialGardDict + "|" + pSpecialDetail + ")";
175
    			"|" + pTabFigPl + "|" + pTabSpecial + "|" + pFolBracket + "|" + pCouv + "|" + pRomNr + "|" +
176
    			pSpecialGardDict + "|" + pSpecialDetail + "|" + pPagesTabFig + ")";
169 177

  
170 178
    protected static String detail = pDetailAlternatives;
171 179

  
172 180
    //reference
173
    protected static String volume = nr4 + "[a-z]?" + "(\\("+ nr4  + "(-"+nr4+")?\\))?";
181
    protected static String volume = nr4 + "[a-z]?" + fWs + "(\\("+ nr4  + "([-\u2013]" + nr4 + ")?\\))?" + "(\\((Suppl|Beibl)\\.\\))?";
174 182
    //this line caused problem https://dev.e-taxonomy.eu/trac/ticket/1556 in its original form: "([\u005E:\\.]" + fWs + ")";
175 183
    protected static String anySepChar = "([\u005E:a-zA-Z]" + fWs + ")"; //all characters except for the detail separator, a stricter version would be [,\\-\\&] and some other characters
176 184
//  protected static String anySepChar = "([,\\-\\&\\.\\+\\']" + fWs + ")";
......
178 186
    protected static int authorSeparatorMaxPosition = 4;  //Author may have a maximum of 4 words
179 187
    protected static String pTitleWordSeparator = "(\\."+ fWs+"|" + oWs + ")";
180 188
    protected static String pSeriesPart = ",?" + fWs + "[sS]er(\\.)?" + oWs + "\\d{1,2},?";
181
    protected static String referenceTitleFirstPart = "(" + word + pTitleWordSeparator + ")";
189

  
190
    protected static String referenceTitleFirstPart = "(" + apostrophWord + pTitleWordSeparator + "|" + twoCapitalDotWord + fWs + ")";
182 191
    protected static String referenceTitle = referenceTitleFirstPart + "*" + "("+ dotWord + "|" + uppercaseWord + "|" + pSeriesPart + ")";  //reference title may have words seperated by whitespace or dot. The last word may not have a whitespace at the end. There must be at least one word
183 192
    protected static String referenceTitleWithSepCharacters = "(((" + referenceTitle +"|\\(.+\\))"  + anySepChar + ")*" + referenceTitle + ")"; //,?
184 193
    //TODO test performance ??
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/TimePeriodParser.java
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.strategy.parser;
10

  
11
import java.text.DateFormat;
12
import java.text.ParsePosition;
13
import java.util.Calendar;
14
import java.util.Date;
15
import java.util.regex.Matcher;
16
import java.util.regex.Pattern;
17

  
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.log4j.Logger;
20
import org.joda.time.DateTime;
21
import org.joda.time.DateTimeFieldType;
22
import org.joda.time.Partial;
23

  
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.model.common.TimePeriod;
26

  
27
/**
28
 * Class for parsing all types of date string to TimePeriod
29
 * @author a.mueller
30
 * @created 14-Jul-2013
31
 */
32
public class TimePeriodParser {
33
	private static final Logger logger = Logger.getLogger(TimePeriodParser.class);
34
	
35
	//patter for first year in string;
36
	private static final Pattern firstYearPattern =  Pattern.compile("\\d{4}");
37
	//case "1806"[1807];
38
	private static final Pattern uncorrectYearPatter =  Pattern.compile("\"\\d{4}\"\\s*\\[\\d{4}\\]");
39
	//case fl. 1806 or c. 1806 or fl. 1806?
40
	private static final Pattern prefixedYearPattern =  Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??");
41
	//standard
42
	private static final Pattern standardPattern =  Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?");
43
	private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}";
44
	private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate);
45
	private static final Pattern dotDatePattern =  Pattern.compile(strDotDatePeriodPattern);
46
	private static final Pattern lifeSpanPattern =  Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern));
47

  
48

  
49
	public static TimePeriod parseString(TimePeriod timePeriod, String periodString){
50
		//TODO move to parser class
51
		//TODO until now only quick and dirty (and partly wrong)
52
		TimePeriod result = timePeriod;
53

  
54
		if(timePeriod == null){
55
			return timePeriod;
56
		}
57

  
58
		if (periodString == null){
59
			return result;
60
		}
61
		periodString = periodString.trim();
62

  
63
		result.setFreeText(null);
64
		Date date;
65

  
66
		//case "1806"[1807];
67
		if (uncorrectYearPatter.matcher(periodString).matches()){
68
			result.setFreeText(periodString);
69
			String realYear = periodString.split("\\[")[1];
70
			realYear = realYear.replace("]", "");
71
			result.setStartYear(Integer.valueOf(realYear));
72
			result.setFreeText(periodString);
73
		//case fl. 1806 or c. 1806 or fl. 1806?
74
		}else if(prefixedYearPattern.matcher(periodString).matches()){
75
			result.setFreeText(periodString);
76
			Matcher yearMatcher = firstYearPattern.matcher(periodString);
77
			yearMatcher.find();
78
			String startYear = yearMatcher.group();
79
			result.setStartYear(Integer.valueOf(startYear));
80
			if (yearMatcher.find()){
81
				String endYear = yearMatcher.group();
82
				result.setEndYear(Integer.valueOf(endYear));
83
			}
84
		}else if (dotDatePattern.matcher(periodString).matches()){
85
			parseDotDatePattern(periodString, result);
86
		}else if (lifeSpanPattern.matcher(periodString).matches()){
87
			parseLifeSpanPattern(periodString, result);
88
		}else if (standardPattern.matcher(periodString).matches()){
89
			parseStandardPattern(periodString, result);
90
//TODO first check ambiguity of parser results e.g. for 7/12/11
91
//			}else if (isDateString(periodString)){
92
//				String[] startEnd = makeStartEnd(periodString);
93
//				String start = startEnd[0];
94
//				DateTime startDateTime = dateStringParse(start, true);
95
//				result.setStart(startDateTime);
96
//				if (startEnd.length > 1){
97
//					DateTime endDateTime = dateStringParse(startEnd[1], true);
98
//					;
99
//					result.setEnd(endDateTime.toLocalDate());
100
//				}
101

  
102
		}else{
103
			result.setFreeText(periodString);
104
		}
105
		return result;
106
	}
107

  
108
	private static boolean isDateString(String periodString) {
109
		String[] startEnd = makeStartEnd(periodString);
110
		String start = startEnd[0];
111
		DateTime startDateTime = dateStringParse(start, true);
112
		if (startDateTime == null){
113
			return false;
114
		}
115
		if (startEnd.length > 1){
116
			DateTime endDateTime = dateStringParse(startEnd[1], true);
117
			if (endDateTime != null){
118
				return true;
119
			}
120
		}
121
		return false;
122
	}
123

  
124

  
125
	/**
126
	 * @param periodString
127
	 * @return
128
	 */
129
	private static String[] makeStartEnd(String periodString) {
130
		String[] startEnd = new String[]{periodString};
131
		if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){
132
			startEnd = periodString.split("-");
133
		}
134
		return startEnd;
135
	}
136

  
137

  
138
	private static DateTime dateStringParse(String string, boolean strict) {
139
		DateFormat dateFormat = DateFormat.getDateInstance();
140
		ParsePosition pos = new ParsePosition(0);
141
		Date a = dateFormat.parse(string, pos);
142
		if (a == null || pos.getIndex() != string.length()){
143
			return null;
144
		}
145
		Calendar cal = Calendar.getInstance();
146
		cal.setTime(a);
147
		DateTime result = new DateTime(cal);
148
		return result;
149
	}
150

  
151

  
152
	/**
153
	 * @param periodString
154
	 * @param result
155
	 */
156
	private static void parseDotDatePattern(String periodString,TimePeriod result) {
157
		String[] dates = periodString.split("-");
158
		Partial dtStart = null;
159
		Partial dtEnd = null;
160

  
161
		if (dates.length > 2 || dates.length <= 0){
162
			logger.warn("More than 1 '-' in period String: " + periodString);
163
			result.setFreeText(periodString);
164
		}else {
165
			try {
166
				//start
167
				if (! StringUtils.isBlank(dates[0])){
168
					dtStart = parseSingleDotDate(dates[0].trim());
169
				}
170

  
171
				//end
172
				if (dates.length >= 2 && ! StringUtils.isBlank(dates[1])){
173
					dtEnd = parseSingleDotDate(dates[1].trim());
174
				}
175

  
176
				result.setStart(dtStart);
177
				result.setEnd(dtEnd);
178
			} catch (IllegalArgumentException e) {
179
				//logger.warn(e.getMessage());
180
				result.setFreeText(periodString);
181
			}
182
		}
183
	}
184
	
185
	private static void parseLifeSpanPattern(String periodString, TimePeriod result) {
186
		
187
		try{
188
			String[] years = periodString.split("--");
189
			String start = years[0];
190
			String end = years[1];
191
			
192
			result.setStartYear(Integer.valueOf(start));
193
			result.setEndYear(Integer.valueOf(end));
194
		} catch (Exception e) {
195
			//logger.warn(e.getMessage());
196
			result.setFreeText(periodString);
197
		}
198
	}
199

  
200

  
201
	/**
202
	 * @param periodString
203
	 * @param result
204
	 */
205
	private static void parseStandardPattern(String periodString,
206
			TimePeriod result) {
207
		String[] years = periodString.split("-");
208
		Partial dtStart = null;
209
		Partial dtEnd = null;
210

  
211
		if (years.length > 2 || years.length <= 0){
212
			logger.warn("More than 1 '-' in period String: " + periodString);
213
		}else {
214
			try {
215
				//start
216
				if (! CdmUtils.isEmpty(years[0])){
217
					dtStart = parseSingleDate(years[0].trim());
218
				}
219

  
220
				//end
221
				if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){
222
					years[1] = years[1].trim();
223
					if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){
224
						years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1];
225
					}
226
					dtEnd = parseSingleDate(years[1]);
227
				}
228

  
229
				result.setStart(dtStart);
230
				result.setEnd(dtEnd);
231
			} catch (IllegalArgumentException e) {
232
				//logger.warn(e.getMessage());
233
				result.setFreeText(periodString);
234
			}
235
		}
236
	}
237

  
238
	public static TimePeriod parseString(String strPeriod) {
239
		TimePeriod timePeriod = TimePeriod.NewInstance();
240
		return parseString(timePeriod, strPeriod);
241
	}
242

  
243

  
244
	protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{
245
		//FIXME until now only quick and dirty and incomplete
246
		Partial partial =  new Partial();
247
		singleDateString = singleDateString.trim();
248
		if (CdmUtils.isNumeric(singleDateString)){
249
			try {
250
				Integer year = Integer.valueOf(singleDateString.trim());
251
				if (year < 1000 && year > 2100){
252
					logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
253
				}else if (year < 1700 && year > 2100){
254
					logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
255
					partial = partial.with(TimePeriod.YEAR_TYPE, year);
256
				}else{
257
					partial = partial.with(TimePeriod.YEAR_TYPE, year);
258
				}
259
			} catch (NumberFormatException e) {
260
				logger.debug("Not a Integer format in getCalendar()");
261
				throw new IllegalArgumentException(e);
262
			}
263
		}else{
264
			throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString);
265
		}
266
		return partial;
267

  
268
	}
269

  
270
	protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{
271
		Partial partial =  new Partial();
272
		singleDateString = singleDateString.trim();
273
		String[] split = singleDateString.split("\\.");
274
		int length = split.length;
275
		if (length > 3){
276
			throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString));
277
		}
278
		String strYear = split[split.length-1];
279
		String strMonth = length >= 2? split[split.length-2]: null;
280
		String strDay = length >= 3? split[split.length-3]: null;
281

  
282

  
283
		try {
284
			Integer year = Integer.valueOf(strYear.trim());
285
			Integer month = Integer.valueOf(strMonth.trim());
286
			Integer day = Integer.valueOf(strDay.trim());
287
			if (year < 1000 && year > 2100){
288
				logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
289
			}else if (year < 1700 && year > 2100){
290
				logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
291
				partial = partial.with(TimePeriod.YEAR_TYPE, year);
292
			}else{
293
				partial = partial.with(TimePeriod.YEAR_TYPE, year);
294
			}
295
			if (month != null && month != 0){
296
				partial = partial.with(TimePeriod.MONTH_TYPE, month);
297
			}
298
			if (day != null && day != 0){
299
				partial = partial.with(TimePeriod.DAY_TYPE, day);
300
			}
301
		} catch (NumberFormatException e) {
302
			logger.debug("Not a Integer format somewhere in " + singleDateString);
303
			throw new IllegalArgumentException(e);
304
		}
305
		return partial;
306

  
307
	}
308

  
309
}
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.strategy.parser;
10

  
11
import java.text.DateFormat;
12
import java.text.ParsePosition;
13
import java.util.Calendar;
14
import java.util.Date;
15
import java.util.regex.Matcher;
16
import java.util.regex.Pattern;
17

  
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.log4j.Logger;
20
import org.joda.time.DateTime;
21
import org.joda.time.DateTimeFieldType;
22
import org.joda.time.Partial;
23

  
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.common.UTF8;
26
import eu.etaxonomy.cdm.model.common.TimePeriod;
27

  
28
/**
29
 * Class for parsing all types of date string to TimePeriod
30
 * @author a.mueller
31
 * @created 14-Jul-2013
32
 */
33
public class TimePeriodParser {
34
	private static final Logger logger = Logger.getLogger(TimePeriodParser.class);
35

  
36
	//patter for first year in string;
37
	private static final Pattern firstYearPattern =  Pattern.compile("\\d{4}");
38
	//case "1806"[1807];
39
	private static final Pattern uncorrectYearPatter =  Pattern.compile("[\""+UTF8.ENGLISH_QUOT_START+"]\\d{4}[\""+UTF8.ENGLISH_QUOT_END+"]\\s*\\[\\d{4}\\]");
40
	//case fl. 1806 or c. 1806 or fl. 1806?
41
	private static final Pattern prefixedYearPattern =  Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??");
42
	//standard
43
	private static final Pattern standardPattern =  Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?");
44
	private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}";
45
	private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate);
46
	private static final Pattern dotDatePattern =  Pattern.compile(strDotDatePeriodPattern);
47
	private static final Pattern lifeSpanPattern =  Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern));
48

  
49

  
50
	public static TimePeriod parseString(TimePeriod timePeriod, String periodString){
51
		//TODO move to parser class
52
		//TODO until now only quick and dirty (and partly wrong)
53
		TimePeriod result = timePeriod;
54

  
55
		if(timePeriod == null){
56
			return timePeriod;
57
		}
58

  
59
		if (periodString == null){
60
			return result;
61
		}
62
		periodString = periodString.trim();
63

  
64
		result.setFreeText(null);
65
		Date date;
66

  
67
		//case "1806"[1807];
68
		if (uncorrectYearPatter.matcher(periodString).matches()){
69
			result.setFreeText(periodString);
70
			String realYear = periodString.split("\\[")[1];
71
			realYear = realYear.replace("]", "");
72
			result.setStartYear(Integer.valueOf(realYear));
73
			result.setFreeText(periodString);
74
		//case fl. 1806 or c. 1806 or fl. 1806?
75
		}else if(prefixedYearPattern.matcher(periodString).matches()){
76
			result.setFreeText(periodString);
77
			Matcher yearMatcher = firstYearPattern.matcher(periodString);
78
			yearMatcher.find();
79
			String startYear = yearMatcher.group();
80
			result.setStartYear(Integer.valueOf(startYear));
81
			if (yearMatcher.find()){
82
				String endYear = yearMatcher.group();
83
				result.setEndYear(Integer.valueOf(endYear));
84
			}
85
		}else if (dotDatePattern.matcher(periodString).matches()){
86
			parseDotDatePattern(periodString, result);
87
		}else if (lifeSpanPattern.matcher(periodString).matches()){
88
			parseLifeSpanPattern(periodString, result);
89
		}else if (standardPattern.matcher(periodString).matches()){
90
			parseStandardPattern(periodString, result);
91
//TODO first check ambiguity of parser results e.g. for 7/12/11
92
//			}else if (isDateString(periodString)){
93
//				String[] startEnd = makeStartEnd(periodString);
94
//				String start = startEnd[0];
95
//				DateTime startDateTime = dateStringParse(start, true);
96
//				result.setStart(startDateTime);
97
//				if (startEnd.length > 1){
98
//					DateTime endDateTime = dateStringParse(startEnd[1], true);
99
//					;
100
//					result.setEnd(endDateTime.toLocalDate());
101
//				}
102

  
103
		}else{
104
			result.setFreeText(periodString);
105
		}
106
		return result;
107
	}
108

  
109
	private static boolean isDateString(String periodString) {
110
		String[] startEnd = makeStartEnd(periodString);
111
		String start = startEnd[0];
112
		DateTime startDateTime = dateStringParse(start, true);
113
		if (startDateTime == null){
114
			return false;
115
		}
116
		if (startEnd.length > 1){
117
			DateTime endDateTime = dateStringParse(startEnd[1], true);
118
			if (endDateTime != null){
119
				return true;
120
			}
121
		}
122
		return false;
123
	}
124

  
125

  
126
	/**
127
	 * @param periodString
128
	 * @return
129
	 */
130
	private static String[] makeStartEnd(String periodString) {
131
		String[] startEnd = new String[]{periodString};
132
		if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){
133
			startEnd = periodString.split("-");
134
		}
135
		return startEnd;
136
	}
137

  
138

  
139
	private static DateTime dateStringParse(String string, boolean strict) {
140
		DateFormat dateFormat = DateFormat.getDateInstance();
141
		ParsePosition pos = new ParsePosition(0);
142
		Date a = dateFormat.parse(string, pos);
143
		if (a == null || pos.getIndex() != string.length()){
144
			return null;
145
		}
146
		Calendar cal = Calendar.getInstance();
147
		cal.setTime(a);
148
		DateTime result = new DateTime(cal);
149
		return result;
150
	}
151

  
152

  
153
	/**
154
	 * @param periodString
155
	 * @param result
156
	 */
157
	private static void parseDotDatePattern(String periodString,TimePeriod result) {
158
		String[] dates = periodString.split("-");
159
		Partial dtStart = null;
160
		Partial dtEnd = null;
161

  
162
		if (dates.length > 2 || dates.length <= 0){
163
			logger.warn("More than 1 '-' in period String: " + periodString);
164
			result.setFreeText(periodString);
165
		}else {
166
			try {
167
				//start
168
				if (! StringUtils.isBlank(dates[0])){
169
					dtStart = parseSingleDotDate(dates[0].trim());
170
				}
171

  
172
				//end
173
				if (dates.length >= 2 && ! StringUtils.isBlank(dates[1])){
174
					dtEnd = parseSingleDotDate(dates[1].trim());
175
				}
176

  
177
				result.setStart(dtStart);
178
				result.setEnd(dtEnd);
179
			} catch (IllegalArgumentException e) {
180
				//logger.warn(e.getMessage());
181
				result.setFreeText(periodString);
182
			}
183
		}
184
	}
185

  
186
	private static void parseLifeSpanPattern(String periodString, TimePeriod result) {
187

  
188
		try{
189
			String[] years = periodString.split("--");
190
			String start = years[0];
191
			String end = years[1];
192

  
193
			result.setStartYear(Integer.valueOf(start));
194
			result.setEndYear(Integer.valueOf(end));
195
		} catch (Exception e) {
196
			//logger.warn(e.getMessage());
197
			result.setFreeText(periodString);
198
		}
199
	}
200

  
201

  
202
	/**
203
	 * @param periodString
204
	 * @param result
205
	 */
206
	private static void parseStandardPattern(String periodString,
207
			TimePeriod result) {
208
		String[] years = periodString.split("-");
209
		Partial dtStart = null;
210
		Partial dtEnd = null;
211

  
212
		if (years.length > 2 || years.length <= 0){
213
			logger.warn("More than 1 '-' in period String: " + periodString);
214
		}else {
215
			try {
216
				//start
217
				if (! CdmUtils.isEmpty(years[0])){
218
					dtStart = parseSingleDate(years[0].trim());
219
				}
220

  
221
				//end
222
				if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){
223
					years[1] = years[1].trim();
224
					if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){
225
						years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1];
226
					}
227
					dtEnd = parseSingleDate(years[1]);
228
				}
229

  
230
				result.setStart(dtStart);
231
				result.setEnd(dtEnd);
232
			} catch (IllegalArgumentException e) {
233
				//logger.warn(e.getMessage());
234
				result.setFreeText(periodString);
235
			}
236
		}
237
	}
238

  
239
	public static TimePeriod parseString(String strPeriod) {
240
		TimePeriod timePeriod = TimePeriod.NewInstance();
241
		return parseString(timePeriod, strPeriod);
242
	}
243

  
244

  
245
	protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{
246
		//FIXME until now only quick and dirty and incomplete
247
		Partial partial =  new Partial();
248
		singleDateString = singleDateString.trim();
249
		if (CdmUtils.isNumeric(singleDateString)){
250
			try {
251
				Integer year = Integer.valueOf(singleDateString.trim());
252
				if (year < 1000 && year > 2100){
253
					logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
254
				}else if (year < 1700 && year > 2100){
255
					logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
256
					partial = partial.with(TimePeriod.YEAR_TYPE, year);
257
				}else{
258
					partial = partial.with(TimePeriod.YEAR_TYPE, year);
259
				}
260
			} catch (NumberFormatException e) {
261
				logger.debug("Not a Integer format in getCalendar()");
262
				throw new IllegalArgumentException(e);
263
			}
264
		}else{
265
			throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString);
266
		}
267
		return partial;
268

  
269
	}
270

  
271
	protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{
272
		Partial partial =  new Partial();
273
		singleDateString = singleDateString.trim();
274
		String[] split = singleDateString.split("\\.");
275
		int length = split.length;
276
		if (length > 3){
277
			throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString));
278
		}
279
		String strYear = split[split.length-1];
280
		String strMonth = length >= 2? split[split.length-2]: null;
281
		String strDay = length >= 3? split[split.length-3]: null;
282

  
283

  
284
		try {
285
			Integer year = Integer.valueOf(strYear.trim());
286
			Integer month = Integer.valueOf(strMonth.trim());
287
			Integer day = Integer.valueOf(strDay.trim());
288
			if (year < 1000 && year > 2100){
289
				logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
290
			}else if (year < 1700 && year > 2100){
291
				logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
292
				partial = partial.with(TimePeriod.YEAR_TYPE, year);
293
			}else{
294
				partial = partial.with(TimePeriod.YEAR_TYPE, year);
295
			}
296
			if (month != null && month != 0){
297
				partial = partial.with(TimePeriod.MONTH_TYPE, month);
298
			}
299
			if (day != null && day != 0){
300
				partial = partial.with(TimePeriod.DAY_TYPE, day);
301
			}
302
		} catch (NumberFormatException e) {
303
			logger.debug("Not a Integer format somewhere in " + singleDateString);
304
			throw new IllegalArgumentException(e);
305
		}
306
		return partial;
307

  
308
	}
309

  
310
}
cdmlib-model/src/test/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplTest.java
1694 1694

  
1695 1695
	}
1696 1696

  
1697
    @Test
1698
    public final void testExistingProblems(){
1699
        //Canabio, issue with space
1700
        NonViralName<?> name = parser.parseReferencedName("Machaonia erythrocarpa var. hondurensis (Standl.) Borhidi"
1701
                + " in Acta Bot. Hung. 46 (1-2): 30. 2004");
1702
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1703
        TeamOrPersonBase<?> combinationAuthor = name.getCombinationAuthorship();
1704
        assertEquals( "Borhidi", combinationAuthor.getNomenclaturalTitle());
1705
        Reference nomRef = (Reference)name.getNomenclaturalReference();
1706
        assertEquals(ReferenceType.Article, nomRef.getType());
1707
        assertEquals("46 (1-2)", nomRef.getVolume());
1708

  
1709
        //Canabio, detail with fig.
1710
        name = parser.parseReferencedName("Didymaea floribunda Rzed."
1711
                + " in Bol. Soc. Bot. Méx. 44: 72, fig. 1. 1983");
1712
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1713
        combinationAuthor = name.getCombinationAuthorship();
1714
        assertEquals( "Rzed.", combinationAuthor.getNomenclaturalTitle());
1715
        nomRef = (Reference)name.getNomenclaturalReference();
1716
        assertEquals(ReferenceType.Article, nomRef.getType());
1717
        assertEquals("44", nomRef.getVolume());
1718
        assertEquals("72, fig. 1", name.getNomenclaturalMicroReference());
1719

  
1720
        //fig with a-c and without dot
1721
        name = parser.parseReferencedName("Deppea guerrerensis Dwyer & Lorence"
1722
                + " in Allertonia 4: 428. fig 4a-c. 1988");  //
1723
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1724
        combinationAuthor = name.getCombinationAuthorship();
1725
        assertEquals( "Dwyer & Lorence", combinationAuthor.getNomenclaturalTitle());
1726
        nomRef = (Reference)name.getNomenclaturalReference();
1727
        assertEquals(ReferenceType.Article, nomRef.getType());
1728
        assertEquals("4", nomRef.getVolume());
1729
        assertEquals("428. fig 4a-c", name.getNomenclaturalMicroReference());
1730

  
1731
        //issue with EN_DASH (3–4)
1732
        name = parser.parseReferencedName("Arachnothryx tacanensis (Lundell) Borhidi"
1733
              + " in Acta Bot. Hung. 33 (3–4): 303. 1987");
1734
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1735
        combinationAuthor = name.getCombinationAuthorship();
1736
        assertEquals( "Borhidi", combinationAuthor.getNomenclaturalTitle());
1737
        nomRef = (Reference)name.getNomenclaturalReference();
1738
        assertEquals(ReferenceType.Article, nomRef.getType());
1739
        assertEquals("33 (3–4)", nomRef.getVolume());
1740
        assertEquals("303", name.getNomenclaturalMicroReference());
1741

  
1742
        //fig with f.
1743
        name = parser.parseReferencedName("Stenotis Terrell"
1744
                + " in Sida 19(4): 901–911, f. 1–2. 2001");
1745
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1746
        combinationAuthor = name.getCombinationAuthorship();
1747
        assertEquals( "Terrell", combinationAuthor.getNomenclaturalTitle());
1748
        nomRef = (Reference)name.getNomenclaturalReference();
1749
        assertEquals(ReferenceType.Article, nomRef.getType());
1750
        assertEquals("19(4)", nomRef.getVolume());
1751
        assertEquals("901–911, f. 1–2", name.getNomenclaturalMicroReference());
1752

  
1753
        //pl
1754
        name = parser.parseReferencedName("Carapichea  Aubl."
1755
                + " in Hist. Pl. Guiane 1: 167, pl. 64. 1775");
1756
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1757
        combinationAuthor = name.getCombinationAuthorship();
1758
        assertEquals( "Aubl.", combinationAuthor.getNomenclaturalTitle());
1759
        nomRef = (Reference)name.getNomenclaturalReference();
1760
        assertEquals(ReferenceType.Article, nomRef.getType());
1761
        assertEquals("1", nomRef.getVolume());
1762
        assertEquals("167, pl. 64", name.getNomenclaturalMicroReference());
1763

  
1764
        //fig with ,
1765
        name = parser.parseReferencedName("Hoffmannia ixtlanensis Lorence"
1766
                + " in Novon 4: 121. fig. 2a, b. 1994");
1767
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1768
        combinationAuthor = name.getCombinationAuthorship();
1769
        assertEquals( "Lorence", combinationAuthor.getNomenclaturalTitle());
1770
        nomRef = (Reference)name.getNomenclaturalReference();
1771
        assertEquals(ReferenceType.Article, nomRef.getType());
1772
        assertEquals("4", nomRef.getVolume());
1773
        assertEquals("121. fig. 2a, b", name.getNomenclaturalMicroReference());
1774

  
1775
        //(Suppl.)
1776
        name = parser.parseReferencedName("Manettia costaricensis  Wernham"
1777
                + " in J. Bot. 57(Suppl.): 38. 1919");
1778
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1779
        combinationAuthor = name.getCombinationAuthorship();
1780
        assertEquals( "Wernham", combinationAuthor.getNomenclaturalTitle());
1781
        nomRef = (Reference)name.getNomenclaturalReference();
1782
        assertEquals(ReferenceType.Article, nomRef.getType());
1783
        assertEquals("57(Suppl.)", nomRef.getVolume());
1784
        assertEquals("38", name.getNomenclaturalMicroReference());
1785

  
1786
        //NY.
1787
        name = parser.parseReferencedName("Crusea psyllioides (Kunth) W.R. Anderson"
1788
                + " in Mem. NY. Bot. Gard. 22: 75. 1972");
1789
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1790
        combinationAuthor = name.getCombinationAuthorship();
1791
        assertEquals( "W.R. Anderson", combinationAuthor.getNomenclaturalTitle());
1792
        nomRef = (Reference)name.getNomenclaturalReference();
1793
        assertEquals(ReferenceType.Article, nomRef.getType());
1794
        assertEquals("22", nomRef.getVolume());
1795
        assertEquals("75", name.getNomenclaturalMicroReference());
1796

  
1797
        //apostroph word in title
1798
        name = parser.parseReferencedName("Sabicea glabrescens Benth."
1799
                + " in Hooker's J. Bot. Kew Gard. Misc. 3: 219. 1841");
1800
        Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1801
        combinationAuthor = name.getCombinationAuthorship();
1802
        assertEquals( "Benth.", combinationAuthor.getNomenclaturalTitle());
1803
        nomRef = (Reference)name.getNomenclaturalReference();
1804
        assertEquals(ReferenceType.Article, nomRef.getType());
1805
        assertEquals("3", nomRef.getVolume());
1806
        assertEquals("219", name.getNomenclaturalMicroReference());
1807

  
1808
        //
1809
//      //(Hannover) place published
1810
//      name = parser.parseReferencedName("Pittoniotis trichantha Griseb."
1811
//              + " in Bonplandia (Hannover) 6 (1): 8. 1858");
1812
//      Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache());
1813
//      combinationAuthor = name.getCombinationAuthorship();
1814
//      assertEquals( "Griseb.", combinationAuthor.getNomenclaturalTitle());
1815
//      nomRef = (Reference)name.getNomenclaturalReference();
1816
//      assertEquals(ReferenceType.Article, nomRef.getType());
1817
//      assertEquals("6 (1)", nomRef.getVolume());
1818
//      assertEquals("8", name.getNomenclaturalMicroReference());
1819
 }
1820

  
1697 1821
}
cdmlib-model/src/test/java/eu/etaxonomy/cdm/strategy/parser/TimePeriodParserTest.java
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/ 
9

  
10
package eu.etaxonomy.cdm.strategy.parser;
11

  
12
import static org.junit.Assert.assertFalse;
13
import static org.junit.Assert.assertNotNull;
14
import static org.junit.Assert.assertNull;
15
import static org.junit.Assert.assertTrue;
16
import org.junit.Assert;
17

  
18
import org.apache.log4j.Logger;
19
import org.joda.time.DateTimeFieldType;
20
import org.joda.time.Partial;
21
import org.junit.After;
22
import org.junit.AfterClass;
23
import org.junit.Before;
24
import org.junit.BeforeClass;
25
import org.junit.Test;
26

  
27
import eu.etaxonomy.cdm.model.common.TimePeriod;
28
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
29

  
30
/**
31
 * @author a.mueller
32
 *
33
 */
34
public class TimePeriodParserTest {
35
	private static final Logger logger = Logger.getLogger(TimePeriodParserTest.class);
36
	
37
	private TimePeriod onlyStartYear;
38
	private TimePeriod onlyEndYear;
39
	private TimePeriod startAndEndYear;
40
	private TimePeriod noStartAndEndYear;
41
	
42
	
43
	/**
44
	 * @throws java.lang.Exception
45
	 */
46
	@BeforeClass
47
	public static void setUpBeforeClass() throws Exception {
48
	}
49

  
50
	/**
51
	 * @throws java.lang.Exception
52
	 */
53
	@AfterClass
54
	public static void tearDownAfterClass() throws Exception {
55
	}
56

  
57
	/**
58
	 * @throws java.lang.Exception
59
	 */
60
	@Before
61
	public void setUp() throws Exception {
62
		onlyStartYear = TimePeriod.NewInstance(1922);
63
		onlyEndYear = TimePeriod.NewInstance(null, 1857);;
64
		startAndEndYear = TimePeriod.NewInstance(1931, 1957);
65
		Integer start = null;
66
		Integer end = null;
67
		noStartAndEndYear = TimePeriod.NewInstance(start, end);;
68
	}
69

  
70
	/**
71
	 * @throws java.lang.Exception
72
	 */
73
	@After
74
	public void tearDown() throws Exception {
75
	}
76

  
77
	
78
//************************ TESTS ******************************************		
79
	
80

  
81
	@Test
82
	public void testParseSingleDateString() {
83
		String strDate = "1756";
84
		Partial date = TimePeriodParser.parseSingleDate(strDate);
85
		assertNotNull(date);
86
		Assert.assertEquals(Integer.parseInt(strDate), date.get(DateTimeFieldType.year()));
87
		try {
88
			date.get(DateTimeFieldType.monthOfYear());
89
			assertFalse(true); //should not be reached
90
		} catch (Exception e) {
91
			assertTrue(e instanceof IllegalArgumentException);
92
		}
93
		try {
94
			date.get(DateTimeFieldType.dayOfMonth());
95
			assertFalse(true); //should not be reached
96
		} catch (Exception e) {
97
			assertTrue(e instanceof IllegalArgumentException);
98
		}
99
		//to be continued
100
	}
101
	
102
	
103
	/**
104
	 * Test method for {@link eu.etaxonomy.cdm.model.common.TimePeriod#parseString(java.lang.String)}.
105
	 */
106
	@Test
107
	public void testParseStringString() {
108
		String strTimePeriod = "1756";
109
		TimePeriod tp1 = TimePeriodParser.parseString(strTimePeriod);
110
		assertNotNull(tp1);
111
		Assert.assertEquals(strTimePeriod, tp1.getYear());
112
		Assert.assertEquals(strTimePeriod, String.valueOf(tp1.getStartYear()));
113
		assertNull(tp1.getEnd());
114
		assertNull(tp1.getStartMonth());
115
		strTimePeriod = "1756-88";
116
		tp1 = TimePeriodParser.parseString(strTimePeriod);
117
		assertNotNull(tp1);
118
		Assert.assertEquals("1756-1788", tp1.getYear());
119
		Assert.assertEquals("1756", String.valueOf(tp1.getStartYear()));
120
		Assert.assertEquals("1788", String.valueOf(tp1.getEndYear()));
121
		assertNull(tp1.getEndMonth());
122
		assertNull(tp1.getStartMonth());
123
		//unparsable
124
		String strUnparsablePeriod = "wef 1809-78";
125
		TimePeriod tpUnparsable = TimePeriodParser.parseString(strUnparsablePeriod);
126
		assertNotNull(tpUnparsable);
127
		Assert.assertEquals(strUnparsablePeriod, tpUnparsable.getFreeText());
128
		
129
		//"1806"[1807]
130
		String strCorrectedPeriod = "\"1806\"[1807]";
131
		TimePeriod tpcorrected = TimePeriodParser.parseString(strCorrectedPeriod);
132
		assertNotNull(tpcorrected);
133
		Assert.assertEquals(strCorrectedPeriod, tpcorrected.getFreeText());
134
		Assert.assertEquals("1807", tpcorrected.getYear());
135
		
136
		
137
		//fl. 1806
138
		String strFlPeriod = "fl.  1806?";
139
		TimePeriod tpFl = TimePeriodParser.parseString(strFlPeriod);
140
		assertNotNull(tpFl);
141
		Assert.assertEquals(strFlPeriod, tpFl.getFreeText());
142
		Assert.assertEquals("1806", tpFl.getYear());
143
		
144
		String strCPeriod = "c.  1806-1810";
145
		TimePeriod tpC = TimePeriodParser.parseString(strCPeriod);
146
		assertNotNull(tpC);
147
		Assert.assertEquals(strCPeriod, tpC.getFreeText());
148
		Assert.assertEquals(Integer.valueOf(1806), tpC.getStartYear());
149
		Assert.assertEquals(Integer.valueOf(1810), tpC.getEndYear());
150
		Assert.assertEquals("1806-1810", tpC.getYear());
151
		
152
		//1.1.2011
153
		String strDotDate = "1.2.2011";
154
		TimePeriod tp = TimePeriodParser.parseString(strDotDate);
155
		assertNotNull(tp);
156
		Assert.assertEquals(strDotDate, tp.toString());
157
		Assert.assertEquals("2011", tp.getYear());
158
		Assert.assertEquals(Integer.valueOf(2), tp.getStartMonth());
159
		Assert.assertEquals(Integer.valueOf(1), tp.getStartDay());
160
		
161
		strDotDate = "31.03.2012";
162
		tp = TimePeriodParser.parseString(strDotDate);
163
		assertNotNull(tp);
164
		Assert.assertEquals("31.3.2012", tp.toString());
165
		Assert.assertEquals("2012", tp.getYear());
166
		Assert.assertEquals(Integer.valueOf(3), tp.getStartMonth());
167
		Assert.assertEquals(Integer.valueOf(31), tp.getStartDay());
168
		
169
		strDotDate = "00.04.2013";
170
		tp = TimePeriodParser.parseString(strDotDate);
171
		assertNotNull(tp);
172
		Assert.assertEquals("4.2013", tp.toString());
173
		Assert.assertEquals("2013", tp.getYear());
174
		Assert.assertEquals(Integer.valueOf(4), tp.getStartMonth());
175
		Assert.assertEquals(null, tp.getStartDay());
176
		
177
		strDotDate = "13.00.2014";
178
		tp = TimePeriodParser.parseString(strDotDate);
179
		assertNotNull(tp);
180
		Assert.assertEquals("13.xx.2014", tp.toString());
181
		Assert.assertEquals("2014", tp.getYear());
182
		Assert.assertEquals(null, tp.getStartMonth());
183
		Assert.assertEquals(Integer.valueOf(13), tp.getStartDay());
184
		
185
		strDotDate = "31.12.2015 - 02.01.2016";
186
		tp = TimePeriodParser.parseString(strDotDate);
187
		assertNotNull(tp);
188
		Assert.assertEquals("31.12.2015-2.1.2016", tp.toString());
189
		Assert.assertEquals("2015-2016", tp.getYear());
190
		Assert.assertEquals(Integer.valueOf(2015), tp.getStartYear());
191
		Assert.assertEquals(Integer.valueOf(12), tp.getStartMonth());
192
		Assert.assertEquals(Integer.valueOf(31), tp.getStartDay());
193
		Assert.assertEquals(Integer.valueOf(2016), tp.getEndYear());
194
		Assert.assertEquals(Integer.valueOf(1), tp.getEndMonth());
195
		Assert.assertEquals(Integer.valueOf(2), tp.getEndDay());		
196
	}
197
	
198
	
199
}
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

  
10
package eu.etaxonomy.cdm.strategy.parser;
11

  
12
import static org.junit.Assert.assertFalse;
13
import static org.junit.Assert.assertNotNull;
14
import static org.junit.Assert.assertNull;
15
import static org.junit.Assert.assertTrue;
16

  
17
import org.apache.log4j.Logger;
18
import org.joda.time.DateTimeFieldType;
19
import org.joda.time.Partial;
20
import org.junit.After;
21
import org.junit.AfterClass;
22
import org.junit.Assert;
23
import org.junit.Before;
24
import org.junit.BeforeClass;
25
import org.junit.Test;
26

  
27
import eu.etaxonomy.cdm.common.UTF8;
28
import eu.etaxonomy.cdm.model.common.TimePeriod;
29

  
30
/**
31
 * @author a.mueller
32
 *
33
 */
34
public class TimePeriodParserTest {
35
	private static final Logger logger = Logger.getLogger(TimePeriodParserTest.class);
36

  
37
	private TimePeriod onlyStartYear;
38
	private TimePeriod onlyEndYear;
39
	private TimePeriod startAndEndYear;
40
	private TimePeriod noStartAndEndYear;
41

  
42

  
43
	/**
44
	 * @throws java.lang.Exception
45
	 */
46
	@BeforeClass
47
	public static void setUpBeforeClass() throws Exception {
48
	}
49

  
50
	/**
51
	 * @throws java.lang.Exception
52
	 */
53
	@AfterClass
54
	public static void tearDownAfterClass() throws Exception {
55
	}
56

  
57
	/**
58
	 * @throws java.lang.Exception
59
	 */
60
	@Before
61
	public void setUp() throws Exception {
62
		onlyStartYear = TimePeriod.NewInstance(1922);
63
		onlyEndYear = TimePeriod.NewInstance(null, 1857);
64
		startAndEndYear = TimePeriod.NewInstance(1931, 1957);
65
		Integer start = null;
66
		Integer end = null;
67
		noStartAndEndYear = TimePeriod.NewInstance(start, end);
68
	}
69

  
70
	/**
71
	 * @throws java.lang.Exception
72
	 */
73
	@After
74
	public void tearDown() throws Exception {
75
	}
76

  
77

  
78
//************************ TESTS ******************************************
79

  
80

  
81
	@Test
82
	public void testParseSingleDateString() {
83
		String strDate = "1756";
84
		Partial date = TimePeriodParser.parseSingleDate(strDate);
85
		assertNotNull(date);
86
		Assert.assertEquals(Integer.parseInt(strDate), date.get(DateTimeFieldType.year()));
87
		try {
88
			date.get(DateTimeFieldType.monthOfYear());
89
			assertFalse(true); //should not be reached
90
		} catch (Exception e) {
91
			assertTrue(e instanceof IllegalArgumentException);
92
		}
93
		try {
94
			date.get(DateTimeFieldType.dayOfMonth());
95
			assertFalse(true); //should not be reached
96
		} catch (Exception e) {
97
			assertTrue(e instanceof IllegalArgumentException);
98
		}
99
		//to be continued
100
	}
101

  
102

  
103
	/**
104
	 * Test method for {@link eu.etaxonomy.cdm.model.common.TimePeriod#parseString(java.lang.String)}.
105
	 */
106
	@Test
107
	public void testParseStringString() {
108
		String strTimePeriod = "1756";
109
		TimePeriod tp1 = TimePeriodParser.parseString(strTimePeriod);
110
		assertNotNull(tp1);
111
		Assert.assertEquals(strTimePeriod, tp1.getYear());
112
		Assert.assertEquals(strTimePeriod, String.valueOf(tp1.getStartYear()));
113
		assertNull(tp1.getEnd());
114
		assertNull(tp1.getStartMonth());
115
		strTimePeriod = "1756-88";
116
		tp1 = TimePeriodParser.parseString(strTimePeriod);
117
		assertNotNull(tp1);
118
		Assert.assertEquals("1756-1788", tp1.getYear());
119
		Assert.assertEquals("1756", String.valueOf(tp1.getStartYear()));
120
		Assert.assertEquals("1788", String.valueOf(tp1.getEndYear()));
121
		assertNull(tp1.getEndMonth());
122
		assertNull(tp1.getStartMonth());
123
		//unparsable
124
		String strUnparsablePeriod = "wef 1809-78";
125
		TimePeriod tpUnparsable = TimePeriodParser.parseString(strUnparsablePeriod);
126
		assertNotNull(tpUnparsable);
127
		Assert.assertEquals(strUnparsablePeriod, tpUnparsable.getFreeText());
128

  
129
		//"1806"[1807]
130
		String strCorrectedPeriod = "\"1806\"[1807]";
131
		TimePeriod tpcorrected = TimePeriodParser.parseString(strCorrectedPeriod);
132
		assertNotNull(tpcorrected);
133
		Assert.assertEquals(strCorrectedPeriod, tpcorrected.getFreeText());
134
		Assert.assertEquals("1807", tpcorrected.getYear());
135

  
136
	      //„1806‟[1807]
137
        String strCorrectedEnPeriod = UTF8.ENGLISH_QUOT_START + "1806"+UTF8.ENGLISH_QUOT_END+"[1807]";
138
        TimePeriod tpcorrectedEn = TimePeriodParser.parseString(strCorrectedEnPeriod);
139
        assertNotNull(tpcorrectedEn);
140
        Assert.assertEquals(strCorrectedEnPeriod, tpcorrectedEn.getFreeText());
141
        Assert.assertEquals("1807", tpcorrectedEn.getYear());
142

  
143

  
144
		//fl. 1806
145
		String strFlPeriod = "fl.  1806?";
146
		TimePeriod tpFl = TimePeriodParser.parseString(strFlPeriod);
147
		assertNotNull(tpFl);
148
		Assert.assertEquals(strFlPeriod, tpFl.getFreeText());
149
		Assert.assertEquals("1806", tpFl.getYear());
150

  
151
		String strCPeriod = "c.  1806-1810";
152
		TimePeriod tpC = TimePeriodParser.parseString(strCPeriod);
153
		assertNotNull(tpC);
154
		Assert.assertEquals(strCPeriod, tpC.getFreeText());
155
		Assert.assertEquals(Integer.valueOf(1806), tpC.getStartYear());
156
		Assert.assertEquals(Integer.valueOf(1810), tpC.getEndYear());
157
		Assert.assertEquals("1806-1810", tpC.getYear());
158

  
159
		//1.1.2011
160
		String strDotDate = "1.2.2011";
161
		TimePeriod tp = TimePeriodParser.parseString(strDotDate);
162
		assertNotNull(tp);
163
		Assert.assertEquals(strDotDate, tp.toString());
164
		Assert.assertEquals("2011", tp.getYear());
165
		Assert.assertEquals(Integer.valueOf(2), tp.getStartMonth());
166
		Assert.assertEquals(Integer.valueOf(1), tp.getStartDay());
167

  
168
		strDotDate = "31.03.2012";
169
		tp = TimePeriodParser.parseString(strDotDate);
170
		assertNotNull(tp);
171
		Assert.assertEquals("31.3.2012", tp.toString());
172
		Assert.assertEquals("2012", tp.getYear());
173
		Assert.assertEquals(Integer.valueOf(3), tp.getStartMonth());
174
		Assert.assertEquals(Integer.valueOf(31), tp.getStartDay());
175

  
176
		strDotDate = "00.04.2013";
177
		tp = TimePeriodParser.parseString(strDotDate);
178
		assertNotNull(tp);
179
		Assert.assertEquals("4.2013", tp.toString());
180
		Assert.assertEquals("2013", tp.getYear());
181
		Assert.assertEquals(Integer.valueOf(4), tp.getStartMonth());
182
		Assert.assertEquals(null, tp.getStartDay());
183

  
184
		strDotDate = "13.00.2014";
185
		tp = TimePeriodParser.parseString(strDotDate);
186
		assertNotNull(tp);
187
		Assert.assertEquals("13.xx.2014", tp.toString());
188
		Assert.assertEquals("2014", tp.getYear());
189
		Assert.assertEquals(null, tp.getStartMonth());
190
		Assert.assertEquals(Integer.valueOf(13), tp.getStartDay());
191

  
192
		strDotDate = "31.12.2015 - 02.01.2016";
193
		tp = TimePeriodParser.parseString(strDotDate);
194
		assertNotNull(tp);
195
		Assert.assertEquals("31.12.2015-2.1.2016", tp.toString());
196
		Assert.assertEquals("2015-2016", tp.getYear());
197
		Assert.assertEquals(Integer.valueOf(2015), tp.getStartYear());
198
		Assert.assertEquals(Integer.valueOf(12), tp.getStartMonth());
199
		Assert.assertEquals(Integer.valueOf(31), tp.getStartDay());
200
		Assert.assertEquals(Integer.valueOf(2016), tp.getEndYear());
201
		Assert.assertEquals(Integer.valueOf(1), tp.getEndMonth());
202
		Assert.assertEquals(Integer.valueOf(2), tp.getEndDay());
203
	}
204

  
205

  
206
}

Also available in: Unified diff