2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.strategy
.parser
;
11 import java
.text
.DateFormat
;
12 import java
.text
.ParsePosition
;
13 import java
.util
.Calendar
;
14 import java
.util
.Date
;
15 import java
.util
.regex
.Matcher
;
16 import java
.util
.regex
.Pattern
;
18 import org
.apache
.log4j
.Logger
;
19 import org
.joda
.time
.DateTime
;
20 import org
.joda
.time
.DateTimeFieldType
;
21 import org
.joda
.time
.Partial
;
23 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
24 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
27 * Class for parsing all types of date string to TimePeriod
29 * @created 14-Jul-2013
31 public class TimePeriodParser
{
32 private static final Logger logger
= Logger
.getLogger(TimePeriodParser
.class);
34 //patter for first year in string;
35 private static final Pattern firstYearPattern
= Pattern
.compile("\\d{4}");
37 private static final Pattern uncorrectYearPatter
= Pattern
.compile("\"\\d{4}\"\\s*\\[\\d{4}\\]");
38 //case fl. 1806 or c. 1806 or fl. 1806?
39 private static final Pattern prefixedYearPattern
= Pattern
.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??");
41 private static final Pattern standardPattern
= Pattern
.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?");
42 private static final String strDotDate
= "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}";
43 private static final String strDotDatePeriodPattern
= String
.format("%s(\\s*-\\s*%s?)?", strDotDate
, strDotDate
);
44 private static final Pattern dotDatePattern
= Pattern
.compile(strDotDatePeriodPattern
);
47 public static TimePeriod
parseString(TimePeriod timePeriod
, String periodString
){
48 //TODO move to parser class
49 //TODO until now only quick and dirty (and partly wrong)
50 TimePeriod result
= timePeriod
;
52 if(timePeriod
== null){
56 if (periodString
== null){
59 periodString
= periodString
.trim();
61 result
.setFreeText(null);
65 if (uncorrectYearPatter
.matcher(periodString
).matches()){
66 result
.setFreeText(periodString
);
67 String realYear
= periodString
.split("\\[")[1];
68 realYear
= realYear
.replace("]", "");
69 result
.setStartYear(Integer
.valueOf(realYear
));
70 result
.setFreeText(periodString
);
71 //case fl. 1806 or c. 1806 or fl. 1806?
72 }else if(prefixedYearPattern
.matcher(periodString
).matches()){
73 result
.setFreeText(periodString
);
74 Matcher yearMatcher
= firstYearPattern
.matcher(periodString
);
76 String startYear
= yearMatcher
.group();
77 result
.setStartYear(Integer
.valueOf(startYear
));
78 if (yearMatcher
.find()){
79 String endYear
= yearMatcher
.group();
80 result
.setEndYear(Integer
.valueOf(endYear
));
82 }else if (dotDatePattern
.matcher(periodString
).matches()){
83 parseDotDatePattern(periodString
, result
);
84 }else if (standardPattern
.matcher(periodString
).matches()){
85 parseStandardPattern(periodString
, result
);
86 //TODO first check ambiguity of parser results e.g. for 7/12/11
87 // }else if (isDateString(periodString)){
88 // String[] startEnd = makeStartEnd(periodString);
89 // String start = startEnd[0];
90 // DateTime startDateTime = dateStringParse(start, true);
91 // result.setStart(startDateTime);
92 // if (startEnd.length > 1){
93 // DateTime endDateTime = dateStringParse(startEnd[1], true);
95 // result.setEnd(endDateTime.toLocalDate());
99 result
.setFreeText(periodString
);
104 private static boolean isDateString(String periodString
) {
105 String
[] startEnd
= makeStartEnd(periodString
);
106 String start
= startEnd
[0];
107 DateTime startDateTime
= dateStringParse(start
, true);
108 if (startDateTime
== null){
111 if (startEnd
.length
> 1){
112 DateTime endDateTime
= dateStringParse(startEnd
[1], true);
113 if (endDateTime
!= null){
122 * @param periodString
125 private static String
[] makeStartEnd(String periodString
) {
126 String
[] startEnd
= new String
[]{periodString
};
127 if (periodString
.contains("-") && periodString
.matches("^-{2,}-^-{2,}")){
128 startEnd
= periodString
.split("-");
134 private static DateTime
dateStringParse(String string
, boolean strict
) {
135 DateFormat dateFormat
= DateFormat
.getDateInstance();
136 ParsePosition pos
= new ParsePosition(0);
137 Date a
= dateFormat
.parse(string
, pos
);
138 if (a
== null || pos
.getIndex() != string
.length()){
141 Calendar cal
= Calendar
.getInstance();
143 DateTime result
= new DateTime(cal
);
149 * @param periodString
152 private static void parseDotDatePattern(String periodString
,TimePeriod result
) {
153 String
[] dates
= periodString
.split("-");
154 Partial dtStart
= null;
155 Partial dtEnd
= null;
157 if (dates
.length
> 2 || dates
.length
<= 0){
158 logger
.warn("More than 1 '-' in period String: " + periodString
);
159 result
.setFreeText(periodString
);
163 if (! CdmUtils
.isEmpty(dates
[0])){
164 dtStart
= parseSingleDotDate(dates
[0].trim());
168 if (dates
.length
>= 2 && ! CdmUtils
.isEmpty(dates
[1])){
169 dtEnd
= parseSingleDotDate(dates
[1].trim());
172 result
.setStart(dtStart
);
173 result
.setEnd(dtEnd
);
174 } catch (IllegalArgumentException e
) {
175 //logger.warn(e.getMessage());
176 result
.setFreeText(periodString
);
183 * @param periodString
186 private static void parseStandardPattern(String periodString
,
188 String
[] years
= periodString
.split("-");
189 Partial dtStart
= null;
190 Partial dtEnd
= null;
192 if (years
.length
> 2 || years
.length
<= 0){
193 logger
.warn("More than 1 '-' in period String: " + periodString
);
197 if (! CdmUtils
.isEmpty(years
[0])){
198 dtStart
= parseSingleDate(years
[0].trim());
202 if (years
.length
>= 2 && ! CdmUtils
.isEmpty(years
[1])){
203 years
[1] = years
[1].trim();
204 if (years
[1].length()==2 && dtStart
!= null && dtStart
.isSupported(DateTimeFieldType
.year())){
205 years
[1] = String
.valueOf(dtStart
.get(DateTimeFieldType
.year())/100) + years
[1];
207 dtEnd
= parseSingleDate(years
[1]);
210 result
.setStart(dtStart
);
211 result
.setEnd(dtEnd
);
212 } catch (IllegalArgumentException e
) {
213 //logger.warn(e.getMessage());
214 result
.setFreeText(periodString
);
219 public static TimePeriod
parseString(String strPeriod
) {
220 TimePeriod timePeriod
= TimePeriod
.NewInstance();
221 return parseString(timePeriod
, strPeriod
);
225 protected static Partial
parseSingleDate(String singleDateString
) throws IllegalArgumentException
{
226 //FIXME until now only quick and dirty and incomplete
227 Partial partial
= new Partial();
228 singleDateString
= singleDateString
.trim();
229 if (CdmUtils
.isNumeric(singleDateString
)){
231 Integer year
= Integer
.valueOf(singleDateString
.trim());
232 if (year
< 1000 && year
> 2100){
233 logger
.warn("Not a valid year: " + year
+ ". Year must be between 1000 and 2100");
234 }else if (year
< 1700 && year
> 2100){
235 logger
.warn("Not a valid taxonomic year: " + year
+ ". Year must be between 1750 and 2100");
236 partial
= partial
.with(TimePeriod
.YEAR_TYPE
, year
);
238 partial
= partial
.with(TimePeriod
.YEAR_TYPE
, year
);
240 } catch (NumberFormatException e
) {
241 logger
.debug("Not a Integer format in getCalendar()");
242 throw new IllegalArgumentException(e
);
245 throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString
);
251 protected static Partial
parseSingleDotDate(String singleDateString
) throws IllegalArgumentException
{
252 Partial partial
= new Partial();
253 singleDateString
= singleDateString
.trim();
254 String
[] split
= singleDateString
.split("\\.");
255 int length
= split
.length
;
257 throw new IllegalArgumentException(String
.format("More than 2 dots in date '%s'", singleDateString
));
259 String strYear
= split
[split
.length
-1];
260 String strMonth
= length
>= 2? split
[split
.length
-2]: null;
261 String strDay
= length
>= 3? split
[split
.length
-3]: null;
265 Integer year
= Integer
.valueOf(strYear
.trim());
266 Integer month
= Integer
.valueOf(strMonth
.trim());
267 Integer day
= Integer
.valueOf(strDay
.trim());
268 if (year
< 1000 && year
> 2100){
269 logger
.warn("Not a valid year: " + year
+ ". Year must be between 1000 and 2100");
270 }else if (year
< 1700 && year
> 2100){
271 logger
.warn("Not a valid taxonomic year: " + year
+ ". Year must be between 1750 and 2100");
272 partial
= partial
.with(TimePeriod
.YEAR_TYPE
, year
);
274 partial
= partial
.with(TimePeriod
.YEAR_TYPE
, year
);
276 if (month
!= null && month
!= 0){
277 partial
= partial
.with(TimePeriod
.MONTH_TYPE
, month
);
279 if (day
!= null && day
!= 0){
280 partial
= partial
.with(TimePeriod
.DAY_TYPE
, day
);
282 } catch (NumberFormatException e
) {
283 logger
.debug("Not a Integer format somewhere in " + singleDateString
);
284 throw new IllegalArgumentException(e
);