Project

General

Profile

Download (9.83 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.strategy.parser;
10

    
11
import java.text.DateFormat;
12
import java.text.ParsePosition;
13
import java.util.Calendar;
14
import java.util.Date;
15
import java.util.regex.Matcher;
16
import java.util.regex.Pattern;
17

    
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.log4j.Logger;
20
import org.joda.time.DateTime;
21
import org.joda.time.DateTimeFieldType;
22
import org.joda.time.Partial;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.model.common.TimePeriod;
26

    
27
/**
28
 * Class for parsing all types of date string to TimePeriod
29
 * @author a.mueller
30
 * @created 14-Jul-2013
31
 */
32
public class TimePeriodParser {
33
	private static final Logger logger = Logger.getLogger(TimePeriodParser.class);
34

    
35
	//patter for first year in string;
36
	private static final Pattern firstYearPattern =  Pattern.compile("\\d{4}");
37
	//case "1806"[1807];
38
	private static final Pattern uncorrectYearPatter = Pattern.compile(NonViralNameParserImplRegExBase.incorrectYearPhrase);
39
//OLD	        Pattern.compile("[\""+UTF8.ENGLISH_QUOT_START+"]\\d{4}[\""+UTF8.ENGLISH_QUOT_END+"]\\s*\\[\\d{4}\\]");
40

    
41
	//case fl. 1806 or c. 1806 or fl. 1806?
42
	private static final Pattern prefixedYearPattern =  Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??");
43
	//standard
44
	private static final Pattern standardPattern =  Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?");
45
	private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}";
46
	private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate);
47
	private static final Pattern dotDatePattern =  Pattern.compile(strDotDatePeriodPattern);
48
	private static final Pattern lifeSpanPattern =  Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern));
49

    
50

    
51
	public static TimePeriod parseString(TimePeriod timePeriod, String periodString){
52
		//TODO move to parser class
53
		//TODO until now only quick and dirty (and partly wrong)
54
		TimePeriod result = timePeriod;
55

    
56
		if(timePeriod == null){
57
			return timePeriod;
58
		}
59

    
60
		if (periodString == null){
61
			return result;
62
		}
63
		periodString = periodString.trim();
64

    
65
		result.setFreeText(null);
66

    
67
		//case "1806"[1807];
68
		if (uncorrectYearPatter.matcher(periodString).matches()){
69
			result.setFreeText(periodString);
70
			String realYear = periodString.split("\\[")[1];
71
			realYear = realYear.replace("]", "");
72
			result.setStartYear(Integer.valueOf(realYear));
73
			result.setFreeText(periodString);
74
		//case fl. 1806 or c. 1806 or fl. 1806?
75
		}else if(prefixedYearPattern.matcher(periodString).matches()){
76
			result.setFreeText(periodString);
77
			Matcher yearMatcher = firstYearPattern.matcher(periodString);
78
			yearMatcher.find();
79
			String startYear = yearMatcher.group();
80
			result.setStartYear(Integer.valueOf(startYear));
81
			if (yearMatcher.find()){
82
				String endYear = yearMatcher.group();
83
				result.setEndYear(Integer.valueOf(endYear));
84
			}
85
		}else if (dotDatePattern.matcher(periodString).matches()){
86
			parseDotDatePattern(periodString, result);
87
		}else if (lifeSpanPattern.matcher(periodString).matches()){
88
			parseLifeSpanPattern(periodString, result);
89
		}else if (standardPattern.matcher(periodString).matches()){
90
			parseStandardPattern(periodString, result);
91
//TODO first check ambiguity of parser results e.g. for 7/12/11
92
//			}else if (isDateString(periodString)){
93
//				String[] startEnd = makeStartEnd(periodString);
94
//				String start = startEnd[0];
95
//				DateTime startDateTime = dateStringParse(start, true);
96
//				result.setStart(startDateTime);
97
//				if (startEnd.length > 1){
98
//					DateTime endDateTime = dateStringParse(startEnd[1], true);
99
//					;
100
//					result.setEnd(endDateTime.toLocalDate());
101
//				}
102

    
103
		}else{
104
			result.setFreeText(periodString);
105
		}
106
		return result;
107
	}
108

    
109
	private static boolean isDateString(String periodString) {
110
		String[] startEnd = makeStartEnd(periodString);
111
		String start = startEnd[0];
112
		DateTime startDateTime = dateStringParse(start, true);
113
		if (startDateTime == null){
114
			return false;
115
		}
116
		if (startEnd.length > 1){
117
			DateTime endDateTime = dateStringParse(startEnd[1], true);
118
			if (endDateTime != null){
119
				return true;
120
			}
121
		}
122
		return false;
123
	}
124

    
125

    
126
	/**
127
	 * @param periodString
128
	 * @return
129
	 */
130
	private static String[] makeStartEnd(String periodString) {
131
		String[] startEnd = new String[]{periodString};
132
		if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){
133
			startEnd = periodString.split("-");
134
		}
135
		return startEnd;
136
	}
137

    
138

    
139
	private static DateTime dateStringParse(String string, boolean strict) {
140
		DateFormat dateFormat = DateFormat.getDateInstance();
141
		ParsePosition pos = new ParsePosition(0);
142
		Date a = dateFormat.parse(string, pos);
143
		if (a == null || pos.getIndex() != string.length()){
144
			return null;
145
		}
146
		Calendar cal = Calendar.getInstance();
147
		cal.setTime(a);
148
		DateTime result = new DateTime(cal);
149
		return result;
150
	}
151

    
152

    
153
	/**
154
	 * @param periodString
155
	 * @param result
156
	 */
157
	private static void parseDotDatePattern(String periodString,TimePeriod result) {
158
		String[] dates = periodString.split("-");
159
		Partial dtStart = null;
160
		Partial dtEnd = null;
161

    
162
		if (dates.length > 2 || dates.length <= 0){
163
			logger.warn("More than 1 '-' in period String: " + periodString);
164
			result.setFreeText(periodString);
165
		}else {
166
			try {
167
				//start
168
				if (! StringUtils.isBlank(dates[0])){
169
					dtStart = parseSingleDotDate(dates[0].trim());
170
				}
171

    
172
				//end
173
				if (dates.length >= 2 && ! StringUtils.isBlank(dates[1])){
174
					dtEnd = parseSingleDotDate(dates[1].trim());
175
				}
176

    
177
				result.setStart(dtStart);
178
				result.setEnd(dtEnd);
179
			} catch (IllegalArgumentException e) {
180
				//logger.warn(e.getMessage());
181
				result.setFreeText(periodString);
182
			}
183
		}
184
	}
185

    
186
	private static void parseLifeSpanPattern(String periodString, TimePeriod result) {
187

    
188
		try{
189
			String[] years = periodString.split("--");
190
			String start = years[0];
191
			String end = years[1];
192

    
193
			result.setStartYear(Integer.valueOf(start));
194
			result.setEndYear(Integer.valueOf(end));
195
		} catch (Exception e) {
196
			//logger.warn(e.getMessage());
197
			result.setFreeText(periodString);
198
		}
199
	}
200

    
201

    
202
	/**
203
	 * @param periodString
204
	 * @param result
205
	 */
206
	private static void parseStandardPattern(String periodString,
207
			TimePeriod result) {
208
		String[] years = periodString.split("-");
209
		Partial dtStart = null;
210
		Partial dtEnd = null;
211

    
212
		if (years.length > 2 || years.length <= 0){
213
			logger.warn("More than 1 '-' in period String: " + periodString);
214
		}else {
215
			try {
216
				//start
217
				if (! CdmUtils.isEmpty(years[0])){
218
					dtStart = parseSingleDate(years[0].trim());
219
				}
220

    
221
				//end
222
				if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){
223
					years[1] = years[1].trim();
224
					if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){
225
						years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1];
226
					}
227
					dtEnd = parseSingleDate(years[1]);
228
				}
229

    
230
				result.setStart(dtStart);
231
				result.setEnd(dtEnd);
232
			} catch (IllegalArgumentException e) {
233
				//logger.warn(e.getMessage());
234
				result.setFreeText(periodString);
235
			}
236
		}
237
	}
238

    
239
	public static TimePeriod parseString(String strPeriod) {
240
		TimePeriod timePeriod = TimePeriod.NewInstance();
241
		return parseString(timePeriod, strPeriod);
242
	}
243

    
244

    
245
	protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{
246
		//FIXME until now only quick and dirty and incomplete
247
		Partial partial =  new Partial();
248
		singleDateString = singleDateString.trim();
249
		if (CdmUtils.isNumeric(singleDateString)){
250
			try {
251
				Integer year = Integer.valueOf(singleDateString.trim());
252
				if (year < 1000 && year > 2100){
253
					logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
254
				}else if (year < 1700 && year > 2100){
255
					logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
256
					partial = partial.with(TimePeriod.YEAR_TYPE, year);
257
				}else{
258
					partial = partial.with(TimePeriod.YEAR_TYPE, year);
259
				}
260
			} catch (NumberFormatException e) {
261
				logger.debug("Not a Integer format in getCalendar()");
262
				throw new IllegalArgumentException(e);
263
			}
264
		}else{
265
			throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString);
266
		}
267
		return partial;
268

    
269
	}
270

    
271
	protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{
272
		Partial partial =  new Partial();
273
		singleDateString = singleDateString.trim();
274
		String[] split = singleDateString.split("\\.");
275
		int length = split.length;
276
		if (length > 3){
277
			throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString));
278
		}
279
		String strYear = split[split.length-1];
280
		String strMonth = length >= 2? split[split.length-2]: null;
281
		String strDay = length >= 3? split[split.length-3]: null;
282

    
283

    
284
		try {
285
			Integer year = Integer.valueOf(strYear.trim());
286
			Integer month = Integer.valueOf(strMonth.trim());
287
			Integer day = Integer.valueOf(strDay.trim());
288
			if (year < 1000 && year > 2100){
289
				logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
290
			}else if (year < 1700 && year > 2100){
291
				logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
292
				partial = partial.with(TimePeriod.YEAR_TYPE, year);
293
			}else{
294
				partial = partial.with(TimePeriod.YEAR_TYPE, year);
295
			}
296
			if (month != null && month != 0){
297
				partial = partial.with(TimePeriod.MONTH_TYPE, month);
298
			}
299
			if (day != null && day != 0){
300
				partial = partial.with(TimePeriod.DAY_TYPE, day);
301
			}
302
		} catch (NumberFormatException e) {
303
			logger.debug("Not a Integer format somewhere in " + singleDateString);
304
			throw new IllegalArgumentException(e);
305
		}
306
		return partial;
307

    
308
	}
309

    
310
}
(8-8/8)