updated to trunk
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / strategy / parser / TimePeriodParser.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.strategy.parser;
10
11 import java.text.DateFormat;
12 import java.text.ParsePosition;
13 import java.util.Calendar;
14 import java.util.Date;
15 import java.util.regex.Matcher;
16 import java.util.regex.Pattern;
17
18 import org.apache.log4j.Logger;
19 import org.joda.time.DateTime;
20 import org.joda.time.DateTimeFieldType;
21 import org.joda.time.Partial;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.model.common.TimePeriod;
25
26 /**
27 * Class for parsing all types of date string to TimePeriod
28 * @author a.mueller
29 * @created 14-Jul-2013
30 */
31 public class TimePeriodParser {
32 private static final Logger logger = Logger.getLogger(TimePeriodParser.class);
33
34 //patter for first year in string;
35 private static final Pattern firstYearPattern = Pattern.compile("\\d{4}");
36 //case "1806"[1807];
37 private static final Pattern uncorrectYearPatter = Pattern.compile("\"\\d{4}\"\\s*\\[\\d{4}\\]");
38 //case fl. 1806 or c. 1806 or fl. 1806?
39 private static final Pattern prefixedYearPattern = Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??");
40 //standard
41 private static final Pattern standardPattern = Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?");
42 private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}";
43 private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate);
44 private static final Pattern dotDatePattern = Pattern.compile(strDotDatePeriodPattern);
45
46
47 public static TimePeriod parseString(TimePeriod timePeriod, String periodString){
48 //TODO move to parser class
49 //TODO until now only quick and dirty (and partly wrong)
50 TimePeriod result = timePeriod;
51
52 if(timePeriod == null){
53 return timePeriod;
54 }
55
56 if (periodString == null){
57 return result;
58 }
59 periodString = periodString.trim();
60
61 result.setFreeText(null);
62 Date date;
63
64 //case "1806"[1807];
65 if (uncorrectYearPatter.matcher(periodString).matches()){
66 result.setFreeText(periodString);
67 String realYear = periodString.split("\\[")[1];
68 realYear = realYear.replace("]", "");
69 result.setStartYear(Integer.valueOf(realYear));
70 result.setFreeText(periodString);
71 //case fl. 1806 or c. 1806 or fl. 1806?
72 }else if(prefixedYearPattern.matcher(periodString).matches()){
73 result.setFreeText(periodString);
74 Matcher yearMatcher = firstYearPattern.matcher(periodString);
75 yearMatcher.find();
76 String startYear = yearMatcher.group();
77 result.setStartYear(Integer.valueOf(startYear));
78 if (yearMatcher.find()){
79 String endYear = yearMatcher.group();
80 result.setEndYear(Integer.valueOf(endYear));
81 }
82 }else if (dotDatePattern.matcher(periodString).matches()){
83 parseDotDatePattern(periodString, result);
84 }else if (standardPattern.matcher(periodString).matches()){
85 parseStandardPattern(periodString, result);
86 //TODO first check ambiguity of parser results e.g. for 7/12/11
87 // }else if (isDateString(periodString)){
88 // String[] startEnd = makeStartEnd(periodString);
89 // String start = startEnd[0];
90 // DateTime startDateTime = dateStringParse(start, true);
91 // result.setStart(startDateTime);
92 // if (startEnd.length > 1){
93 // DateTime endDateTime = dateStringParse(startEnd[1], true);
94 // ;
95 // result.setEnd(endDateTime.toLocalDate());
96 // }
97
98 }else{
99 result.setFreeText(periodString);
100 }
101 return result;
102 }
103
104 private static boolean isDateString(String periodString) {
105 String[] startEnd = makeStartEnd(periodString);
106 String start = startEnd[0];
107 DateTime startDateTime = dateStringParse(start, true);
108 if (startDateTime == null){
109 return false;
110 }
111 if (startEnd.length > 1){
112 DateTime endDateTime = dateStringParse(startEnd[1], true);
113 if (endDateTime != null){
114 return true;
115 }
116 }
117 return false;
118 }
119
120
121 /**
122 * @param periodString
123 * @return
124 */
125 private static String[] makeStartEnd(String periodString) {
126 String[] startEnd = new String[]{periodString};
127 if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){
128 startEnd = periodString.split("-");
129 }
130 return startEnd;
131 }
132
133
134 private static DateTime dateStringParse(String string, boolean strict) {
135 DateFormat dateFormat = DateFormat.getDateInstance();
136 ParsePosition pos = new ParsePosition(0);
137 Date a = dateFormat.parse(string, pos);
138 if (a == null || pos.getIndex() != string.length()){
139 return null;
140 }
141 Calendar cal = Calendar.getInstance();
142 cal.setTime(a);
143 DateTime result = new DateTime(cal);
144 return result;
145 }
146
147
148 /**
149 * @param periodString
150 * @param result
151 */
152 private static void parseDotDatePattern(String periodString,TimePeriod result) {
153 String[] dates = periodString.split("-");
154 Partial dtStart = null;
155 Partial dtEnd = null;
156
157 if (dates.length > 2 || dates.length <= 0){
158 logger.warn("More than 1 '-' in period String: " + periodString);
159 result.setFreeText(periodString);
160 }else {
161 try {
162 //start
163 if (! CdmUtils.isEmpty(dates[0])){
164 dtStart = parseSingleDotDate(dates[0].trim());
165 }
166
167 //end
168 if (dates.length >= 2 && ! CdmUtils.isEmpty(dates[1])){
169 dtEnd = parseSingleDotDate(dates[1].trim());
170 }
171
172 result.setStart(dtStart);
173 result.setEnd(dtEnd);
174 } catch (IllegalArgumentException e) {
175 //logger.warn(e.getMessage());
176 result.setFreeText(periodString);
177 }
178 }
179 }
180
181
182 /**
183 * @param periodString
184 * @param result
185 */
186 private static void parseStandardPattern(String periodString,
187 TimePeriod result) {
188 String[] years = periodString.split("-");
189 Partial dtStart = null;
190 Partial dtEnd = null;
191
192 if (years.length > 2 || years.length <= 0){
193 logger.warn("More than 1 '-' in period String: " + periodString);
194 }else {
195 try {
196 //start
197 if (! CdmUtils.isEmpty(years[0])){
198 dtStart = parseSingleDate(years[0].trim());
199 }
200
201 //end
202 if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){
203 years[1] = years[1].trim();
204 if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){
205 years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1];
206 }
207 dtEnd = parseSingleDate(years[1]);
208 }
209
210 result.setStart(dtStart);
211 result.setEnd(dtEnd);
212 } catch (IllegalArgumentException e) {
213 //logger.warn(e.getMessage());
214 result.setFreeText(periodString);
215 }
216 }
217 }
218
219 public static TimePeriod parseString(String strPeriod) {
220 TimePeriod timePeriod = TimePeriod.NewInstance();
221 return parseString(timePeriod, strPeriod);
222 }
223
224
225 protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{
226 //FIXME until now only quick and dirty and incomplete
227 Partial partial = new Partial();
228 singleDateString = singleDateString.trim();
229 if (CdmUtils.isNumeric(singleDateString)){
230 try {
231 Integer year = Integer.valueOf(singleDateString.trim());
232 if (year < 1000 && year > 2100){
233 logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
234 }else if (year < 1700 && year > 2100){
235 logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
236 partial = partial.with(TimePeriod.YEAR_TYPE, year);
237 }else{
238 partial = partial.with(TimePeriod.YEAR_TYPE, year);
239 }
240 } catch (NumberFormatException e) {
241 logger.debug("Not a Integer format in getCalendar()");
242 throw new IllegalArgumentException(e);
243 }
244 }else{
245 throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString);
246 }
247 return partial;
248
249 }
250
251 protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{
252 Partial partial = new Partial();
253 singleDateString = singleDateString.trim();
254 String[] split = singleDateString.split("\\.");
255 int length = split.length;
256 if (length > 3){
257 throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString));
258 }
259 String strYear = split[split.length-1];
260 String strMonth = length >= 2? split[split.length-2]: null;
261 String strDay = length >= 3? split[split.length-3]: null;
262
263
264 try {
265 Integer year = Integer.valueOf(strYear.trim());
266 Integer month = Integer.valueOf(strMonth.trim());
267 Integer day = Integer.valueOf(strDay.trim());
268 if (year < 1000 && year > 2100){
269 logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100");
270 }else if (year < 1700 && year > 2100){
271 logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100");
272 partial = partial.with(TimePeriod.YEAR_TYPE, year);
273 }else{
274 partial = partial.with(TimePeriod.YEAR_TYPE, year);
275 }
276 if (month != null && month != 0){
277 partial = partial.with(TimePeriod.MONTH_TYPE, month);
278 }
279 if (day != null && day != 0){
280 partial = partial.with(TimePeriod.DAY_TYPE, day);
281 }
282 } catch (NumberFormatException e) {
283 logger.debug("Not a Integer format somewhere in " + singleDateString);
284 throw new IllegalArgumentException(e);
285 }
286 return partial;
287
288 }
289
290 }