Revision 4f5219a9
Added by Andreas Müller almost 8 years ago
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/TimePeriodParser.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
package eu.etaxonomy.cdm.strategy.parser; |
|
10 |
|
|
11 |
import java.text.DateFormat; |
|
12 |
import java.text.ParsePosition; |
|
13 |
import java.util.Calendar; |
|
14 |
import java.util.Date; |
|
15 |
import java.util.regex.Matcher; |
|
16 |
import java.util.regex.Pattern; |
|
17 |
|
|
18 |
import org.apache.commons.lang.StringUtils; |
|
19 |
import org.apache.log4j.Logger; |
|
20 |
import org.joda.time.DateTime; |
|
21 |
import org.joda.time.DateTimeFieldType; |
|
22 |
import org.joda.time.Partial; |
|
23 |
|
|
24 |
import eu.etaxonomy.cdm.common.CdmUtils; |
|
25 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
26 |
|
|
27 |
/** |
|
28 |
* Class for parsing all types of date string to TimePeriod |
|
29 |
* @author a.mueller |
|
30 |
* @created 14-Jul-2013 |
|
31 |
*/ |
|
32 |
public class TimePeriodParser { |
|
33 |
private static final Logger logger = Logger.getLogger(TimePeriodParser.class); |
|
34 |
|
|
35 |
//patter for first year in string; |
|
36 |
private static final Pattern firstYearPattern = Pattern.compile("\\d{4}"); |
|
37 |
//case "1806"[1807]; |
|
38 |
private static final Pattern uncorrectYearPatter = Pattern.compile("\"\\d{4}\"\\s*\\[\\d{4}\\]"); |
|
39 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
40 |
private static final Pattern prefixedYearPattern = Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??"); |
|
41 |
//standard |
|
42 |
private static final Pattern standardPattern = Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?"); |
|
43 |
private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}"; |
|
44 |
private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate); |
|
45 |
private static final Pattern dotDatePattern = Pattern.compile(strDotDatePeriodPattern); |
|
46 |
private static final Pattern lifeSpanPattern = Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern)); |
|
47 |
|
|
48 |
|
|
49 |
public static TimePeriod parseString(TimePeriod timePeriod, String periodString){ |
|
50 |
//TODO move to parser class |
|
51 |
//TODO until now only quick and dirty (and partly wrong) |
|
52 |
TimePeriod result = timePeriod; |
|
53 |
|
|
54 |
if(timePeriod == null){ |
|
55 |
return timePeriod; |
|
56 |
} |
|
57 |
|
|
58 |
if (periodString == null){ |
|
59 |
return result; |
|
60 |
} |
|
61 |
periodString = periodString.trim(); |
|
62 |
|
|
63 |
result.setFreeText(null); |
|
64 |
Date date; |
|
65 |
|
|
66 |
//case "1806"[1807]; |
|
67 |
if (uncorrectYearPatter.matcher(periodString).matches()){ |
|
68 |
result.setFreeText(periodString); |
|
69 |
String realYear = periodString.split("\\[")[1]; |
|
70 |
realYear = realYear.replace("]", ""); |
|
71 |
result.setStartYear(Integer.valueOf(realYear)); |
|
72 |
result.setFreeText(periodString); |
|
73 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
74 |
}else if(prefixedYearPattern.matcher(periodString).matches()){ |
|
75 |
result.setFreeText(periodString); |
|
76 |
Matcher yearMatcher = firstYearPattern.matcher(periodString); |
|
77 |
yearMatcher.find(); |
|
78 |
String startYear = yearMatcher.group(); |
|
79 |
result.setStartYear(Integer.valueOf(startYear)); |
|
80 |
if (yearMatcher.find()){ |
|
81 |
String endYear = yearMatcher.group(); |
|
82 |
result.setEndYear(Integer.valueOf(endYear)); |
|
83 |
} |
|
84 |
}else if (dotDatePattern.matcher(periodString).matches()){ |
|
85 |
parseDotDatePattern(periodString, result); |
|
86 |
}else if (lifeSpanPattern.matcher(periodString).matches()){ |
|
87 |
parseLifeSpanPattern(periodString, result); |
|
88 |
}else if (standardPattern.matcher(periodString).matches()){ |
|
89 |
parseStandardPattern(periodString, result); |
|
90 |
//TODO first check ambiguity of parser results e.g. for 7/12/11 |
|
91 |
// }else if (isDateString(periodString)){ |
|
92 |
// String[] startEnd = makeStartEnd(periodString); |
|
93 |
// String start = startEnd[0]; |
|
94 |
// DateTime startDateTime = dateStringParse(start, true); |
|
95 |
// result.setStart(startDateTime); |
|
96 |
// if (startEnd.length > 1){ |
|
97 |
// DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
98 |
// ; |
|
99 |
// result.setEnd(endDateTime.toLocalDate()); |
|
100 |
// } |
|
101 |
|
|
102 |
}else{ |
|
103 |
result.setFreeText(periodString); |
|
104 |
} |
|
105 |
return result; |
|
106 |
} |
|
107 |
|
|
108 |
private static boolean isDateString(String periodString) { |
|
109 |
String[] startEnd = makeStartEnd(periodString); |
|
110 |
String start = startEnd[0]; |
|
111 |
DateTime startDateTime = dateStringParse(start, true); |
|
112 |
if (startDateTime == null){ |
|
113 |
return false; |
|
114 |
} |
|
115 |
if (startEnd.length > 1){ |
|
116 |
DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
117 |
if (endDateTime != null){ |
|
118 |
return true; |
|
119 |
} |
|
120 |
} |
|
121 |
return false; |
|
122 |
} |
|
123 |
|
|
124 |
|
|
125 |
/** |
|
126 |
* @param periodString |
|
127 |
* @return |
|
128 |
*/ |
|
129 |
private static String[] makeStartEnd(String periodString) { |
|
130 |
String[] startEnd = new String[]{periodString}; |
|
131 |
if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){ |
|
132 |
startEnd = periodString.split("-"); |
|
133 |
} |
|
134 |
return startEnd; |
|
135 |
} |
|
136 |
|
|
137 |
|
|
138 |
private static DateTime dateStringParse(String string, boolean strict) { |
|
139 |
DateFormat dateFormat = DateFormat.getDateInstance(); |
|
140 |
ParsePosition pos = new ParsePosition(0); |
|
141 |
Date a = dateFormat.parse(string, pos); |
|
142 |
if (a == null || pos.getIndex() != string.length()){ |
|
143 |
return null; |
|
144 |
} |
|
145 |
Calendar cal = Calendar.getInstance(); |
|
146 |
cal.setTime(a); |
|
147 |
DateTime result = new DateTime(cal); |
|
148 |
return result; |
|
149 |
} |
|
150 |
|
|
151 |
|
|
152 |
/** |
|
153 |
* @param periodString |
|
154 |
* @param result |
|
155 |
*/ |
|
156 |
private static void parseDotDatePattern(String periodString,TimePeriod result) { |
|
157 |
String[] dates = periodString.split("-"); |
|
158 |
Partial dtStart = null; |
|
159 |
Partial dtEnd = null; |
|
160 |
|
|
161 |
if (dates.length > 2 || dates.length <= 0){ |
|
162 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
163 |
result.setFreeText(periodString); |
|
164 |
}else { |
|
165 |
try { |
|
166 |
//start |
|
167 |
if (! StringUtils.isBlank(dates[0])){ |
|
168 |
dtStart = parseSingleDotDate(dates[0].trim()); |
|
169 |
} |
|
170 |
|
|
171 |
//end |
|
172 |
if (dates.length >= 2 && ! StringUtils.isBlank(dates[1])){ |
|
173 |
dtEnd = parseSingleDotDate(dates[1].trim()); |
|
174 |
} |
|
175 |
|
|
176 |
result.setStart(dtStart); |
|
177 |
result.setEnd(dtEnd); |
|
178 |
} catch (IllegalArgumentException e) { |
|
179 |
//logger.warn(e.getMessage()); |
|
180 |
result.setFreeText(periodString); |
|
181 |
} |
|
182 |
} |
|
183 |
} |
|
184 |
|
|
185 |
private static void parseLifeSpanPattern(String periodString, TimePeriod result) { |
|
186 |
|
|
187 |
try{ |
|
188 |
String[] years = periodString.split("--"); |
|
189 |
String start = years[0]; |
|
190 |
String end = years[1]; |
|
191 |
|
|
192 |
result.setStartYear(Integer.valueOf(start)); |
|
193 |
result.setEndYear(Integer.valueOf(end)); |
|
194 |
} catch (Exception e) { |
|
195 |
//logger.warn(e.getMessage()); |
|
196 |
result.setFreeText(periodString); |
|
197 |
} |
|
198 |
} |
|
199 |
|
|
200 |
|
|
201 |
/** |
|
202 |
* @param periodString |
|
203 |
* @param result |
|
204 |
*/ |
|
205 |
private static void parseStandardPattern(String periodString, |
|
206 |
TimePeriod result) { |
|
207 |
String[] years = periodString.split("-"); |
|
208 |
Partial dtStart = null; |
|
209 |
Partial dtEnd = null; |
|
210 |
|
|
211 |
if (years.length > 2 || years.length <= 0){ |
|
212 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
213 |
}else { |
|
214 |
try { |
|
215 |
//start |
|
216 |
if (! CdmUtils.isEmpty(years[0])){ |
|
217 |
dtStart = parseSingleDate(years[0].trim()); |
|
218 |
} |
|
219 |
|
|
220 |
//end |
|
221 |
if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){ |
|
222 |
years[1] = years[1].trim(); |
|
223 |
if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){ |
|
224 |
years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1]; |
|
225 |
} |
|
226 |
dtEnd = parseSingleDate(years[1]); |
|
227 |
} |
|
228 |
|
|
229 |
result.setStart(dtStart); |
|
230 |
result.setEnd(dtEnd); |
|
231 |
} catch (IllegalArgumentException e) { |
|
232 |
//logger.warn(e.getMessage()); |
|
233 |
result.setFreeText(periodString); |
|
234 |
} |
|
235 |
} |
|
236 |
} |
|
237 |
|
|
238 |
public static TimePeriod parseString(String strPeriod) { |
|
239 |
TimePeriod timePeriod = TimePeriod.NewInstance(); |
|
240 |
return parseString(timePeriod, strPeriod); |
|
241 |
} |
|
242 |
|
|
243 |
|
|
244 |
protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{ |
|
245 |
//FIXME until now only quick and dirty and incomplete |
|
246 |
Partial partial = new Partial(); |
|
247 |
singleDateString = singleDateString.trim(); |
|
248 |
if (CdmUtils.isNumeric(singleDateString)){ |
|
249 |
try { |
|
250 |
Integer year = Integer.valueOf(singleDateString.trim()); |
|
251 |
if (year < 1000 && year > 2100){ |
|
252 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
253 |
}else if (year < 1700 && year > 2100){ |
|
254 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
255 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
256 |
}else{ |
|
257 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
258 |
} |
|
259 |
} catch (NumberFormatException e) { |
|
260 |
logger.debug("Not a Integer format in getCalendar()"); |
|
261 |
throw new IllegalArgumentException(e); |
|
262 |
} |
|
263 |
}else{ |
|
264 |
throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString); |
|
265 |
} |
|
266 |
return partial; |
|
267 |
|
|
268 |
} |
|
269 |
|
|
270 |
protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{ |
|
271 |
Partial partial = new Partial(); |
|
272 |
singleDateString = singleDateString.trim(); |
|
273 |
String[] split = singleDateString.split("\\."); |
|
274 |
int length = split.length; |
|
275 |
if (length > 3){ |
|
276 |
throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString)); |
|
277 |
} |
|
278 |
String strYear = split[split.length-1]; |
|
279 |
String strMonth = length >= 2? split[split.length-2]: null; |
|
280 |
String strDay = length >= 3? split[split.length-3]: null; |
|
281 |
|
|
282 |
|
|
283 |
try { |
|
284 |
Integer year = Integer.valueOf(strYear.trim()); |
|
285 |
Integer month = Integer.valueOf(strMonth.trim()); |
|
286 |
Integer day = Integer.valueOf(strDay.trim()); |
|
287 |
if (year < 1000 && year > 2100){ |
|
288 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
289 |
}else if (year < 1700 && year > 2100){ |
|
290 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
291 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
292 |
}else{ |
|
293 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
294 |
} |
|
295 |
if (month != null && month != 0){ |
|
296 |
partial = partial.with(TimePeriod.MONTH_TYPE, month); |
|
297 |
} |
|
298 |
if (day != null && day != 0){ |
|
299 |
partial = partial.with(TimePeriod.DAY_TYPE, day); |
|
300 |
} |
|
301 |
} catch (NumberFormatException e) { |
|
302 |
logger.debug("Not a Integer format somewhere in " + singleDateString); |
|
303 |
throw new IllegalArgumentException(e); |
|
304 |
} |
|
305 |
return partial; |
|
306 |
|
|
307 |
} |
|
308 |
|
|
309 |
} |
|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
package eu.etaxonomy.cdm.strategy.parser; |
|
10 |
|
|
11 |
import java.text.DateFormat; |
|
12 |
import java.text.ParsePosition; |
|
13 |
import java.util.Calendar; |
|
14 |
import java.util.Date; |
|
15 |
import java.util.regex.Matcher; |
|
16 |
import java.util.regex.Pattern; |
|
17 |
|
|
18 |
import org.apache.commons.lang.StringUtils; |
|
19 |
import org.apache.log4j.Logger; |
|
20 |
import org.joda.time.DateTime; |
|
21 |
import org.joda.time.DateTimeFieldType; |
|
22 |
import org.joda.time.Partial; |
|
23 |
|
|
24 |
import eu.etaxonomy.cdm.common.CdmUtils; |
|
25 |
import eu.etaxonomy.cdm.common.UTF8; |
|
26 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
27 |
|
|
28 |
/** |
|
29 |
* Class for parsing all types of date string to TimePeriod |
|
30 |
* @author a.mueller |
|
31 |
* @created 14-Jul-2013 |
|
32 |
*/ |
|
33 |
public class TimePeriodParser { |
|
34 |
private static final Logger logger = Logger.getLogger(TimePeriodParser.class); |
|
35 |
|
|
36 |
//patter for first year in string; |
|
37 |
private static final Pattern firstYearPattern = Pattern.compile("\\d{4}"); |
|
38 |
//case "1806"[1807]; |
|
39 |
private static final Pattern uncorrectYearPatter = Pattern.compile("[\""+UTF8.ENGLISH_QUOT_START+"]\\d{4}[\""+UTF8.ENGLISH_QUOT_END+"]\\s*\\[\\d{4}\\]"); |
|
40 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
41 |
private static final Pattern prefixedYearPattern = Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??"); |
|
42 |
//standard |
|
43 |
private static final Pattern standardPattern = Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?"); |
|
44 |
private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}"; |
|
45 |
private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate); |
|
46 |
private static final Pattern dotDatePattern = Pattern.compile(strDotDatePeriodPattern); |
|
47 |
private static final Pattern lifeSpanPattern = Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern)); |
|
48 |
|
|
49 |
|
|
50 |
public static TimePeriod parseString(TimePeriod timePeriod, String periodString){ |
|
51 |
//TODO move to parser class |
|
52 |
//TODO until now only quick and dirty (and partly wrong) |
|
53 |
TimePeriod result = timePeriod; |
|
54 |
|
|
55 |
if(timePeriod == null){ |
|
56 |
return timePeriod; |
|
57 |
} |
|
58 |
|
|
59 |
if (periodString == null){ |
|
60 |
return result; |
|
61 |
} |
|
62 |
periodString = periodString.trim(); |
|
63 |
|
|
64 |
result.setFreeText(null); |
|
65 |
Date date; |
|
66 |
|
|
67 |
//case "1806"[1807]; |
|
68 |
if (uncorrectYearPatter.matcher(periodString).matches()){ |
|
69 |
result.setFreeText(periodString); |
|
70 |
String realYear = periodString.split("\\[")[1]; |
|
71 |
realYear = realYear.replace("]", ""); |
|
72 |
result.setStartYear(Integer.valueOf(realYear)); |
|
73 |
result.setFreeText(periodString); |
|
74 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
75 |
}else if(prefixedYearPattern.matcher(periodString).matches()){ |
|
76 |
result.setFreeText(periodString); |
|
77 |
Matcher yearMatcher = firstYearPattern.matcher(periodString); |
|
78 |
yearMatcher.find(); |
|
79 |
String startYear = yearMatcher.group(); |
|
80 |
result.setStartYear(Integer.valueOf(startYear)); |
|
81 |
if (yearMatcher.find()){ |
|
82 |
String endYear = yearMatcher.group(); |
|
83 |
result.setEndYear(Integer.valueOf(endYear)); |
|
84 |
} |
|
85 |
}else if (dotDatePattern.matcher(periodString).matches()){ |
|
86 |
parseDotDatePattern(periodString, result); |
|
87 |
}else if (lifeSpanPattern.matcher(periodString).matches()){ |
|
88 |
parseLifeSpanPattern(periodString, result); |
|
89 |
}else if (standardPattern.matcher(periodString).matches()){ |
|
90 |
parseStandardPattern(periodString, result); |
|
91 |
//TODO first check ambiguity of parser results e.g. for 7/12/11 |
|
92 |
// }else if (isDateString(periodString)){ |
|
93 |
// String[] startEnd = makeStartEnd(periodString); |
|
94 |
// String start = startEnd[0]; |
|
95 |
// DateTime startDateTime = dateStringParse(start, true); |
|
96 |
// result.setStart(startDateTime); |
|
97 |
// if (startEnd.length > 1){ |
|
98 |
// DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
99 |
// ; |
|
100 |
// result.setEnd(endDateTime.toLocalDate()); |
|
101 |
// } |
|
102 |
|
|
103 |
}else{ |
|
104 |
result.setFreeText(periodString); |
|
105 |
} |
|
106 |
return result; |
|
107 |
} |
|
108 |
|
|
109 |
private static boolean isDateString(String periodString) { |
|
110 |
String[] startEnd = makeStartEnd(periodString); |
|
111 |
String start = startEnd[0]; |
|
112 |
DateTime startDateTime = dateStringParse(start, true); |
|
113 |
if (startDateTime == null){ |
|
114 |
return false; |
|
115 |
} |
|
116 |
if (startEnd.length > 1){ |
|
117 |
DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
118 |
if (endDateTime != null){ |
|
119 |
return true; |
|
120 |
} |
|
121 |
} |
|
122 |
return false; |
|
123 |
} |
|
124 |
|
|
125 |
|
|
126 |
/** |
|
127 |
* @param periodString |
|
128 |
* @return |
|
129 |
*/ |
|
130 |
private static String[] makeStartEnd(String periodString) { |
|
131 |
String[] startEnd = new String[]{periodString}; |
|
132 |
if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){ |
|
133 |
startEnd = periodString.split("-"); |
|
134 |
} |
|
135 |
return startEnd; |
|
136 |
} |
|
137 |
|
|
138 |
|
|
139 |
private static DateTime dateStringParse(String string, boolean strict) { |
|
140 |
DateFormat dateFormat = DateFormat.getDateInstance(); |
|
141 |
ParsePosition pos = new ParsePosition(0); |
|
142 |
Date a = dateFormat.parse(string, pos); |
|
143 |
if (a == null || pos.getIndex() != string.length()){ |
|
144 |
return null; |
|
145 |
} |
|
146 |
Calendar cal = Calendar.getInstance(); |
|
147 |
cal.setTime(a); |
|
148 |
DateTime result = new DateTime(cal); |
|
149 |
return result; |
|
150 |
} |
|
151 |
|
|
152 |
|
|
153 |
/** |
|
154 |
* @param periodString |
|
155 |
* @param result |
|
156 |
*/ |
|
157 |
private static void parseDotDatePattern(String periodString,TimePeriod result) { |
|
158 |
String[] dates = periodString.split("-"); |
|
159 |
Partial dtStart = null; |
|
160 |
Partial dtEnd = null; |
|
161 |
|
|
162 |
if (dates.length > 2 || dates.length <= 0){ |
|
163 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
164 |
result.setFreeText(periodString); |
|
165 |
}else { |
|
166 |
try { |
|
167 |
//start |
|
168 |
if (! StringUtils.isBlank(dates[0])){ |
|
169 |
dtStart = parseSingleDotDate(dates[0].trim()); |
|
170 |
} |
|
171 |
|
|
172 |
//end |
|
173 |
if (dates.length >= 2 && ! StringUtils.isBlank(dates[1])){ |
|
174 |
dtEnd = parseSingleDotDate(dates[1].trim()); |
|
175 |
} |
|
176 |
|
|
177 |
result.setStart(dtStart); |
|
178 |
result.setEnd(dtEnd); |
|
179 |
} catch (IllegalArgumentException e) { |
|
180 |
//logger.warn(e.getMessage()); |
|
181 |
result.setFreeText(periodString); |
|
182 |
} |
|
183 |
} |
|
184 |
} |
|
185 |
|
|
186 |
private static void parseLifeSpanPattern(String periodString, TimePeriod result) { |
|
187 |
|
|
188 |
try{ |
|
189 |
String[] years = periodString.split("--"); |
|
190 |
String start = years[0]; |
|
191 |
String end = years[1]; |
|
192 |
|
|
193 |
result.setStartYear(Integer.valueOf(start)); |
|
194 |
result.setEndYear(Integer.valueOf(end)); |
|
195 |
} catch (Exception e) { |
|
196 |
//logger.warn(e.getMessage()); |
|
197 |
result.setFreeText(periodString); |
|
198 |
} |
|
199 |
} |
|
200 |
|
|
201 |
|
|
202 |
/** |
|
203 |
* @param periodString |
|
204 |
* @param result |
|
205 |
*/ |
|
206 |
private static void parseStandardPattern(String periodString, |
|
207 |
TimePeriod result) { |
|
208 |
String[] years = periodString.split("-"); |
|
209 |
Partial dtStart = null; |
|
210 |
Partial dtEnd = null; |
|
211 |
|
|
212 |
if (years.length > 2 || years.length <= 0){ |
|
213 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
214 |
}else { |
|
215 |
try { |
|
216 |
//start |
|
217 |
if (! CdmUtils.isEmpty(years[0])){ |
|
218 |
dtStart = parseSingleDate(years[0].trim()); |
|
219 |
} |
|
220 |
|
|
221 |
//end |
|
222 |
if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){ |
|
223 |
years[1] = years[1].trim(); |
|
224 |
if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){ |
|
225 |
years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1]; |
|
226 |
} |
|
227 |
dtEnd = parseSingleDate(years[1]); |
|
228 |
} |
|
229 |
|
|
230 |
result.setStart(dtStart); |
|
231 |
result.setEnd(dtEnd); |
|
232 |
} catch (IllegalArgumentException e) { |
|
233 |
//logger.warn(e.getMessage()); |
|
234 |
result.setFreeText(periodString); |
|
235 |
} |
|
236 |
} |
|
237 |
} |
|
238 |
|
|
239 |
public static TimePeriod parseString(String strPeriod) { |
|
240 |
TimePeriod timePeriod = TimePeriod.NewInstance(); |
|
241 |
return parseString(timePeriod, strPeriod); |
|
242 |
} |
|
243 |
|
|
244 |
|
|
245 |
protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{ |
|
246 |
//FIXME until now only quick and dirty and incomplete |
|
247 |
Partial partial = new Partial(); |
|
248 |
singleDateString = singleDateString.trim(); |
|
249 |
if (CdmUtils.isNumeric(singleDateString)){ |
|
250 |
try { |
|
251 |
Integer year = Integer.valueOf(singleDateString.trim()); |
|
252 |
if (year < 1000 && year > 2100){ |
|
253 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
254 |
}else if (year < 1700 && year > 2100){ |
|
255 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
256 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
257 |
}else{ |
|
258 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
259 |
} |
|
260 |
} catch (NumberFormatException e) { |
|
261 |
logger.debug("Not a Integer format in getCalendar()"); |
|
262 |
throw new IllegalArgumentException(e); |
|
263 |
} |
|
264 |
}else{ |
|
265 |
throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString); |
|
266 |
} |
|
267 |
return partial; |
|
268 |
|
|
269 |
} |
|
270 |
|
|
271 |
protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{ |
|
272 |
Partial partial = new Partial(); |
|
273 |
singleDateString = singleDateString.trim(); |
|
274 |
String[] split = singleDateString.split("\\."); |
|
275 |
int length = split.length; |
|
276 |
if (length > 3){ |
|
277 |
throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString)); |
|
278 |
} |
|
279 |
String strYear = split[split.length-1]; |
|
280 |
String strMonth = length >= 2? split[split.length-2]: null; |
|
281 |
String strDay = length >= 3? split[split.length-3]: null; |
|
282 |
|
|
283 |
|
|
284 |
try { |
|
285 |
Integer year = Integer.valueOf(strYear.trim()); |
|
286 |
Integer month = Integer.valueOf(strMonth.trim()); |
|
287 |
Integer day = Integer.valueOf(strDay.trim()); |
|
288 |
if (year < 1000 && year > 2100){ |
|
289 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
290 |
}else if (year < 1700 && year > 2100){ |
|
291 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
292 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
293 |
}else{ |
|
294 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
295 |
} |
|
296 |
if (month != null && month != 0){ |
|
297 |
partial = partial.with(TimePeriod.MONTH_TYPE, month); |
|
298 |
} |
|
299 |
if (day != null && day != 0){ |
|
300 |
partial = partial.with(TimePeriod.DAY_TYPE, day); |
|
301 |
} |
|
302 |
} catch (NumberFormatException e) { |
|
303 |
logger.debug("Not a Integer format somewhere in " + singleDateString); |
|
304 |
throw new IllegalArgumentException(e); |
|
305 |
} |
|
306 |
return partial; |
|
307 |
|
|
308 |
} |
|
309 |
|
|
310 |
} |
Also available in: Unified diff
#5909 Improve referenced name parser