Revision 4f5219a9
Added by Andreas Müller almost 7 years ago
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImpl.java | ||
---|---|---|
535 | 535 |
if (bookMatcher.matches() ){ |
536 | 536 |
result = parseBook(strReference); |
537 | 537 |
}else{ |
538 |
logger.warn("Non-InRef must be book but does not match book");
|
|
538 |
logger.warn("Non-InRef must be book but does not match book: "+ strReference);
|
|
539 | 539 |
result = ReferenceFactory.newBook(); |
540 | 540 |
makeUnparsableRefTitle(result, strReference); |
541 | 541 |
} |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java | ||
---|---|---|
45 | 45 |
protected static String nonCapitalWord = "\\p{javaLowerCase}+"; |
46 | 46 |
protected static String word = "(" + capitalWord + "|" + nonCapitalWord + ")"; //word (capital or non-capital) with no '.' at the end |
47 | 47 |
protected static String uppercaseWord = "\\p{javaUpperCase}{2,}"; |
48 |
protected static String apostrophWord = word + "('\\p{javaLowerCase}*)?"; |
|
48 | 49 |
|
49 | 50 |
protected static String capitalDotWord = capitalWord + "\\.?"; //capitalWord with facultativ '.' at the end |
50 | 51 |
protected static String capital2charDotWord = "(" + capital2LetterWord + "\\.?|\\p{javaUpperCase}\\.)"; //capitalWord with facultativ '.' but minimum 2 characters (single capital word like 'L' is not allowed |
52 |
protected static String twoCapitalDotWord = "\\p{javaUpperCase}{2}\\."; //e.g. NY. |
|
53 |
|
|
51 | 54 |
protected static String nonCapitalDotWord = nonCapitalWord + "\\.?"; //nonCapitalWord with facultativ '.' at the end |
52 | 55 |
protected static String dotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end |
53 | 56 |
protected static String obligateDotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.+"; //word (capital or non-capital) with obligate '.' at the end |
... | ... | |
127 | 130 |
protected static String pBracketNr = "\\[" + nr4 + "\\]"; |
128 | 131 |
protected static String pFolBracket = "\\[fol\\." + fWs + "\\d{1,2}(-\\d{1,2})?\\]"; |
129 | 132 |
|
130 |
protected static String pStrTab = "tab\\." + fWs + nr4 + "(" + fWs + "(B|\u00DF|\\(\\d{1,3}\\)))?"; |
|
131 |
protected static String pFig = "fig\\." + fWs + nr4 + "[a-z]?"; |
|
132 |
protected static String pFigs = pFig + "(-" + nr4 + ")?"; |
|
133 |
protected static String pStrTab = "[tT]((ab)?\\.|ab\\s)" + fWs + nr4 + "(" + fWs + "(B|\u00DF|\\(\\d{1,3}\\)))?"; |
|
134 |
protected static String pFig = "[fF]((ig)?\\.|ig\\s)" + fWs + nr4 + "([a-zA-Z]([-\u2013,]\\s*[a-zA-Z])?)?"; |
|
135 |
protected static String pFigs = pFig + "([-\u2013]" + nr4 + ")?"; |
|
136 |
protected static String pPlate = "[pP]((l)?\\.|l\\s)" + fWs + nr4 + "([a-zA-Z]([-\u2013,]\\s*[a-zA-Z])?)?"; |
|
137 |
|
|
138 |
|
|
133 | 139 |
//static String pTabFig = pStrTab + "(," + fWs + pFigs + ")?"; |
134 |
protected static String pTabFig = "(" + pStrTab + "|" + pFigs + ")";
|
|
140 |
protected static String pTabFigPl = "(" + pStrTab + "|" + pFigs + "|" + pPlate + ")";
|
|
135 | 141 |
|
136 | 142 |
//e.g.: p455; p.455; pp455-456; pp.455-456; pp.455,456; 455, 456; pages 456-457; pages 456,567 |
137 |
protected static String pSinglePages = "(p\\.?)?" + fWs + pPage + "(," + pTabFig +")?"; |
|
138 |
protected static String pMultiPages = "(pp\\.?|pages)?" + fWs + pPage + fWs + "(-|,)" +fWs + pPage ;
|
|
143 |
protected static String pSinglePages = "(p\\.?)?" + fWs + pPage + "(," + pTabFigPl +")?";
|
|
144 |
protected static String pMultiPages = "(pp\\.?|pages)?" + fWs + pPage + fWs + "[-\u2013,]" +fWs + pPage ;
|
|
139 | 145 |
//static String pPages = pPage + "(," + fWs + "(" + pPage + "|" + pTabFig + ")" + ")?"; |
140 | 146 |
protected static String pPages = "(" + pSinglePages +"|" + pMultiPages +")"; |
147 |
protected static String pPagesTabFig = pPages +"[,\\.]" + fWs + pTabFigPl; |
|
148 |
|
|
141 | 149 |
|
142 | 150 |
|
143 | 151 |
protected static String pCouv = "couv\\." + fWs + "\\d{1,3}"; |
... | ... | |
164 | 172 |
protected static String pRomNr = "ljfweffaflas"; //TODO rom number have to be tested first |
165 | 173 |
|
166 | 174 |
protected static String pDetailAlternatives = "(" + pPages + "|" + pPageSpecial + "|" + pStrNo + "|" + pBracketNr + |
167 |
"|" + pTabFig + "|" + pTabSpecial + "|" + pFolBracket + "|" + pCouv + "|" + pRomNr + "|" + |
|
168 |
pSpecialGardDict + "|" + pSpecialDetail + ")"; |
|
175 |
"|" + pTabFigPl + "|" + pTabSpecial + "|" + pFolBracket + "|" + pCouv + "|" + pRomNr + "|" +
|
|
176 |
pSpecialGardDict + "|" + pSpecialDetail + "|" + pPagesTabFig + ")";
|
|
169 | 177 |
|
170 | 178 |
protected static String detail = pDetailAlternatives; |
171 | 179 |
|
172 | 180 |
//reference |
173 |
protected static String volume = nr4 + "[a-z]?" + "(\\("+ nr4 + "(-"+nr4+")?\\))?";
|
|
181 |
protected static String volume = nr4 + "[a-z]?" + fWs + "(\\("+ nr4 + "([-\u2013]" + nr4 + ")?\\))?" + "(\\((Suppl|Beibl)\\.\\))?";
|
|
174 | 182 |
//this line caused problem https://dev.e-taxonomy.eu/trac/ticket/1556 in its original form: "([\u005E:\\.]" + fWs + ")"; |
175 | 183 |
protected static String anySepChar = "([\u005E:a-zA-Z]" + fWs + ")"; //all characters except for the detail separator, a stricter version would be [,\\-\\&] and some other characters |
176 | 184 |
// protected static String anySepChar = "([,\\-\\&\\.\\+\\']" + fWs + ")"; |
... | ... | |
178 | 186 |
protected static int authorSeparatorMaxPosition = 4; //Author may have a maximum of 4 words |
179 | 187 |
protected static String pTitleWordSeparator = "(\\."+ fWs+"|" + oWs + ")"; |
180 | 188 |
protected static String pSeriesPart = ",?" + fWs + "[sS]er(\\.)?" + oWs + "\\d{1,2},?"; |
181 |
protected static String referenceTitleFirstPart = "(" + word + pTitleWordSeparator + ")"; |
|
189 |
|
|
190 |
protected static String referenceTitleFirstPart = "(" + apostrophWord + pTitleWordSeparator + "|" + twoCapitalDotWord + fWs + ")"; |
|
182 | 191 |
protected static String referenceTitle = referenceTitleFirstPart + "*" + "("+ dotWord + "|" + uppercaseWord + "|" + pSeriesPart + ")"; //reference title may have words seperated by whitespace or dot. The last word may not have a whitespace at the end. There must be at least one word |
183 | 192 |
protected static String referenceTitleWithSepCharacters = "(((" + referenceTitle +"|\\(.+\\))" + anySepChar + ")*" + referenceTitle + ")"; //,? |
184 | 193 |
//TODO test performance ?? |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/TimePeriodParser.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
package eu.etaxonomy.cdm.strategy.parser; |
|
10 |
|
|
11 |
import java.text.DateFormat; |
|
12 |
import java.text.ParsePosition; |
|
13 |
import java.util.Calendar; |
|
14 |
import java.util.Date; |
|
15 |
import java.util.regex.Matcher; |
|
16 |
import java.util.regex.Pattern; |
|
17 |
|
|
18 |
import org.apache.commons.lang.StringUtils; |
|
19 |
import org.apache.log4j.Logger; |
|
20 |
import org.joda.time.DateTime; |
|
21 |
import org.joda.time.DateTimeFieldType; |
|
22 |
import org.joda.time.Partial; |
|
23 |
|
|
24 |
import eu.etaxonomy.cdm.common.CdmUtils; |
|
25 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
26 |
|
|
27 |
/** |
|
28 |
* Class for parsing all types of date string to TimePeriod |
|
29 |
* @author a.mueller |
|
30 |
* @created 14-Jul-2013 |
|
31 |
*/ |
|
32 |
public class TimePeriodParser { |
|
33 |
private static final Logger logger = Logger.getLogger(TimePeriodParser.class); |
|
34 |
|
|
35 |
//patter for first year in string; |
|
36 |
private static final Pattern firstYearPattern = Pattern.compile("\\d{4}"); |
|
37 |
//case "1806"[1807]; |
|
38 |
private static final Pattern uncorrectYearPatter = Pattern.compile("\"\\d{4}\"\\s*\\[\\d{4}\\]"); |
|
39 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
40 |
private static final Pattern prefixedYearPattern = Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??"); |
|
41 |
//standard |
|
42 |
private static final Pattern standardPattern = Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?"); |
|
43 |
private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}"; |
|
44 |
private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate); |
|
45 |
private static final Pattern dotDatePattern = Pattern.compile(strDotDatePeriodPattern); |
|
46 |
private static final Pattern lifeSpanPattern = Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern)); |
|
47 |
|
|
48 |
|
|
49 |
public static TimePeriod parseString(TimePeriod timePeriod, String periodString){ |
|
50 |
//TODO move to parser class |
|
51 |
//TODO until now only quick and dirty (and partly wrong) |
|
52 |
TimePeriod result = timePeriod; |
|
53 |
|
|
54 |
if(timePeriod == null){ |
|
55 |
return timePeriod; |
|
56 |
} |
|
57 |
|
|
58 |
if (periodString == null){ |
|
59 |
return result; |
|
60 |
} |
|
61 |
periodString = periodString.trim(); |
|
62 |
|
|
63 |
result.setFreeText(null); |
|
64 |
Date date; |
|
65 |
|
|
66 |
//case "1806"[1807]; |
|
67 |
if (uncorrectYearPatter.matcher(periodString).matches()){ |
|
68 |
result.setFreeText(periodString); |
|
69 |
String realYear = periodString.split("\\[")[1]; |
|
70 |
realYear = realYear.replace("]", ""); |
|
71 |
result.setStartYear(Integer.valueOf(realYear)); |
|
72 |
result.setFreeText(periodString); |
|
73 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
74 |
}else if(prefixedYearPattern.matcher(periodString).matches()){ |
|
75 |
result.setFreeText(periodString); |
|
76 |
Matcher yearMatcher = firstYearPattern.matcher(periodString); |
|
77 |
yearMatcher.find(); |
|
78 |
String startYear = yearMatcher.group(); |
|
79 |
result.setStartYear(Integer.valueOf(startYear)); |
|
80 |
if (yearMatcher.find()){ |
|
81 |
String endYear = yearMatcher.group(); |
|
82 |
result.setEndYear(Integer.valueOf(endYear)); |
|
83 |
} |
|
84 |
}else if (dotDatePattern.matcher(periodString).matches()){ |
|
85 |
parseDotDatePattern(periodString, result); |
|
86 |
}else if (lifeSpanPattern.matcher(periodString).matches()){ |
|
87 |
parseLifeSpanPattern(periodString, result); |
|
88 |
}else if (standardPattern.matcher(periodString).matches()){ |
|
89 |
parseStandardPattern(periodString, result); |
|
90 |
//TODO first check ambiguity of parser results e.g. for 7/12/11 |
|
91 |
// }else if (isDateString(periodString)){ |
|
92 |
// String[] startEnd = makeStartEnd(periodString); |
|
93 |
// String start = startEnd[0]; |
|
94 |
// DateTime startDateTime = dateStringParse(start, true); |
|
95 |
// result.setStart(startDateTime); |
|
96 |
// if (startEnd.length > 1){ |
|
97 |
// DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
98 |
// ; |
|
99 |
// result.setEnd(endDateTime.toLocalDate()); |
|
100 |
// } |
|
101 |
|
|
102 |
}else{ |
|
103 |
result.setFreeText(periodString); |
|
104 |
} |
|
105 |
return result; |
|
106 |
} |
|
107 |
|
|
108 |
private static boolean isDateString(String periodString) { |
|
109 |
String[] startEnd = makeStartEnd(periodString); |
|
110 |
String start = startEnd[0]; |
|
111 |
DateTime startDateTime = dateStringParse(start, true); |
|
112 |
if (startDateTime == null){ |
|
113 |
return false; |
|
114 |
} |
|
115 |
if (startEnd.length > 1){ |
|
116 |
DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
117 |
if (endDateTime != null){ |
|
118 |
return true; |
|
119 |
} |
|
120 |
} |
|
121 |
return false; |
|
122 |
} |
|
123 |
|
|
124 |
|
|
125 |
/** |
|
126 |
* @param periodString |
|
127 |
* @return |
|
128 |
*/ |
|
129 |
private static String[] makeStartEnd(String periodString) { |
|
130 |
String[] startEnd = new String[]{periodString}; |
|
131 |
if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){ |
|
132 |
startEnd = periodString.split("-"); |
|
133 |
} |
|
134 |
return startEnd; |
|
135 |
} |
|
136 |
|
|
137 |
|
|
138 |
private static DateTime dateStringParse(String string, boolean strict) { |
|
139 |
DateFormat dateFormat = DateFormat.getDateInstance(); |
|
140 |
ParsePosition pos = new ParsePosition(0); |
|
141 |
Date a = dateFormat.parse(string, pos); |
|
142 |
if (a == null || pos.getIndex() != string.length()){ |
|
143 |
return null; |
|
144 |
} |
|
145 |
Calendar cal = Calendar.getInstance(); |
|
146 |
cal.setTime(a); |
|
147 |
DateTime result = new DateTime(cal); |
|
148 |
return result; |
|
149 |
} |
|
150 |
|
|
151 |
|
|
152 |
/** |
|
153 |
* @param periodString |
|
154 |
* @param result |
|
155 |
*/ |
|
156 |
private static void parseDotDatePattern(String periodString,TimePeriod result) { |
|
157 |
String[] dates = periodString.split("-"); |
|
158 |
Partial dtStart = null; |
|
159 |
Partial dtEnd = null; |
|
160 |
|
|
161 |
if (dates.length > 2 || dates.length <= 0){ |
|
162 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
163 |
result.setFreeText(periodString); |
|
164 |
}else { |
|
165 |
try { |
|
166 |
//start |
|
167 |
if (! StringUtils.isBlank(dates[0])){ |
|
168 |
dtStart = parseSingleDotDate(dates[0].trim()); |
|
169 |
} |
|
170 |
|
|
171 |
//end |
|
172 |
if (dates.length >= 2 && ! StringUtils.isBlank(dates[1])){ |
|
173 |
dtEnd = parseSingleDotDate(dates[1].trim()); |
|
174 |
} |
|
175 |
|
|
176 |
result.setStart(dtStart); |
|
177 |
result.setEnd(dtEnd); |
|
178 |
} catch (IllegalArgumentException e) { |
|
179 |
//logger.warn(e.getMessage()); |
|
180 |
result.setFreeText(periodString); |
|
181 |
} |
|
182 |
} |
|
183 |
} |
|
184 |
|
|
185 |
private static void parseLifeSpanPattern(String periodString, TimePeriod result) { |
|
186 |
|
|
187 |
try{ |
|
188 |
String[] years = periodString.split("--"); |
|
189 |
String start = years[0]; |
|
190 |
String end = years[1]; |
|
191 |
|
|
192 |
result.setStartYear(Integer.valueOf(start)); |
|
193 |
result.setEndYear(Integer.valueOf(end)); |
|
194 |
} catch (Exception e) { |
|
195 |
//logger.warn(e.getMessage()); |
|
196 |
result.setFreeText(periodString); |
|
197 |
} |
|
198 |
} |
|
199 |
|
|
200 |
|
|
201 |
/** |
|
202 |
* @param periodString |
|
203 |
* @param result |
|
204 |
*/ |
|
205 |
private static void parseStandardPattern(String periodString, |
|
206 |
TimePeriod result) { |
|
207 |
String[] years = periodString.split("-"); |
|
208 |
Partial dtStart = null; |
|
209 |
Partial dtEnd = null; |
|
210 |
|
|
211 |
if (years.length > 2 || years.length <= 0){ |
|
212 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
213 |
}else { |
|
214 |
try { |
|
215 |
//start |
|
216 |
if (! CdmUtils.isEmpty(years[0])){ |
|
217 |
dtStart = parseSingleDate(years[0].trim()); |
|
218 |
} |
|
219 |
|
|
220 |
//end |
|
221 |
if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){ |
|
222 |
years[1] = years[1].trim(); |
|
223 |
if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){ |
|
224 |
years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1]; |
|
225 |
} |
|
226 |
dtEnd = parseSingleDate(years[1]); |
|
227 |
} |
|
228 |
|
|
229 |
result.setStart(dtStart); |
|
230 |
result.setEnd(dtEnd); |
|
231 |
} catch (IllegalArgumentException e) { |
|
232 |
//logger.warn(e.getMessage()); |
|
233 |
result.setFreeText(periodString); |
|
234 |
} |
|
235 |
} |
|
236 |
} |
|
237 |
|
|
238 |
public static TimePeriod parseString(String strPeriod) { |
|
239 |
TimePeriod timePeriod = TimePeriod.NewInstance(); |
|
240 |
return parseString(timePeriod, strPeriod); |
|
241 |
} |
|
242 |
|
|
243 |
|
|
244 |
protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{ |
|
245 |
//FIXME until now only quick and dirty and incomplete |
|
246 |
Partial partial = new Partial(); |
|
247 |
singleDateString = singleDateString.trim(); |
|
248 |
if (CdmUtils.isNumeric(singleDateString)){ |
|
249 |
try { |
|
250 |
Integer year = Integer.valueOf(singleDateString.trim()); |
|
251 |
if (year < 1000 && year > 2100){ |
|
252 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
253 |
}else if (year < 1700 && year > 2100){ |
|
254 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
255 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
256 |
}else{ |
|
257 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
258 |
} |
|
259 |
} catch (NumberFormatException e) { |
|
260 |
logger.debug("Not a Integer format in getCalendar()"); |
|
261 |
throw new IllegalArgumentException(e); |
|
262 |
} |
|
263 |
}else{ |
|
264 |
throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString); |
|
265 |
} |
|
266 |
return partial; |
|
267 |
|
|
268 |
} |
|
269 |
|
|
270 |
protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{ |
|
271 |
Partial partial = new Partial(); |
|
272 |
singleDateString = singleDateString.trim(); |
|
273 |
String[] split = singleDateString.split("\\."); |
|
274 |
int length = split.length; |
|
275 |
if (length > 3){ |
|
276 |
throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString)); |
|
277 |
} |
|
278 |
String strYear = split[split.length-1]; |
|
279 |
String strMonth = length >= 2? split[split.length-2]: null; |
|
280 |
String strDay = length >= 3? split[split.length-3]: null; |
|
281 |
|
|
282 |
|
|
283 |
try { |
|
284 |
Integer year = Integer.valueOf(strYear.trim()); |
|
285 |
Integer month = Integer.valueOf(strMonth.trim()); |
|
286 |
Integer day = Integer.valueOf(strDay.trim()); |
|
287 |
if (year < 1000 && year > 2100){ |
|
288 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
289 |
}else if (year < 1700 && year > 2100){ |
|
290 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
291 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
292 |
}else{ |
|
293 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
294 |
} |
|
295 |
if (month != null && month != 0){ |
|
296 |
partial = partial.with(TimePeriod.MONTH_TYPE, month); |
|
297 |
} |
|
298 |
if (day != null && day != 0){ |
|
299 |
partial = partial.with(TimePeriod.DAY_TYPE, day); |
|
300 |
} |
|
301 |
} catch (NumberFormatException e) { |
|
302 |
logger.debug("Not a Integer format somewhere in " + singleDateString); |
|
303 |
throw new IllegalArgumentException(e); |
|
304 |
} |
|
305 |
return partial; |
|
306 |
|
|
307 |
} |
|
308 |
|
|
309 |
} |
|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
package eu.etaxonomy.cdm.strategy.parser; |
|
10 |
|
|
11 |
import java.text.DateFormat; |
|
12 |
import java.text.ParsePosition; |
|
13 |
import java.util.Calendar; |
|
14 |
import java.util.Date; |
|
15 |
import java.util.regex.Matcher; |
|
16 |
import java.util.regex.Pattern; |
|
17 |
|
|
18 |
import org.apache.commons.lang.StringUtils; |
|
19 |
import org.apache.log4j.Logger; |
|
20 |
import org.joda.time.DateTime; |
|
21 |
import org.joda.time.DateTimeFieldType; |
|
22 |
import org.joda.time.Partial; |
|
23 |
|
|
24 |
import eu.etaxonomy.cdm.common.CdmUtils; |
|
25 |
import eu.etaxonomy.cdm.common.UTF8; |
|
26 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
27 |
|
|
28 |
/** |
|
29 |
* Class for parsing all types of date string to TimePeriod |
|
30 |
* @author a.mueller |
|
31 |
* @created 14-Jul-2013 |
|
32 |
*/ |
|
33 |
public class TimePeriodParser { |
|
34 |
private static final Logger logger = Logger.getLogger(TimePeriodParser.class); |
|
35 |
|
|
36 |
//patter for first year in string; |
|
37 |
private static final Pattern firstYearPattern = Pattern.compile("\\d{4}"); |
|
38 |
//case "1806"[1807]; |
|
39 |
private static final Pattern uncorrectYearPatter = Pattern.compile("[\""+UTF8.ENGLISH_QUOT_START+"]\\d{4}[\""+UTF8.ENGLISH_QUOT_END+"]\\s*\\[\\d{4}\\]"); |
|
40 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
41 |
private static final Pattern prefixedYearPattern = Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??"); |
|
42 |
//standard |
|
43 |
private static final Pattern standardPattern = Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?)?"); |
|
44 |
private static final String strDotDate = "[0-3]?\\d\\.[01]?\\d\\.\\d{4,4}"; |
|
45 |
private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s?)?", strDotDate, strDotDate); |
|
46 |
private static final Pattern dotDatePattern = Pattern.compile(strDotDatePeriodPattern); |
|
47 |
private static final Pattern lifeSpanPattern = Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern)); |
|
48 |
|
|
49 |
|
|
50 |
public static TimePeriod parseString(TimePeriod timePeriod, String periodString){ |
|
51 |
//TODO move to parser class |
|
52 |
//TODO until now only quick and dirty (and partly wrong) |
|
53 |
TimePeriod result = timePeriod; |
|
54 |
|
|
55 |
if(timePeriod == null){ |
|
56 |
return timePeriod; |
|
57 |
} |
|
58 |
|
|
59 |
if (periodString == null){ |
|
60 |
return result; |
|
61 |
} |
|
62 |
periodString = periodString.trim(); |
|
63 |
|
|
64 |
result.setFreeText(null); |
|
65 |
Date date; |
|
66 |
|
|
67 |
//case "1806"[1807]; |
|
68 |
if (uncorrectYearPatter.matcher(periodString).matches()){ |
|
69 |
result.setFreeText(periodString); |
|
70 |
String realYear = periodString.split("\\[")[1]; |
|
71 |
realYear = realYear.replace("]", ""); |
|
72 |
result.setStartYear(Integer.valueOf(realYear)); |
|
73 |
result.setFreeText(periodString); |
|
74 |
//case fl. 1806 or c. 1806 or fl. 1806? |
|
75 |
}else if(prefixedYearPattern.matcher(periodString).matches()){ |
|
76 |
result.setFreeText(periodString); |
|
77 |
Matcher yearMatcher = firstYearPattern.matcher(periodString); |
|
78 |
yearMatcher.find(); |
|
79 |
String startYear = yearMatcher.group(); |
|
80 |
result.setStartYear(Integer.valueOf(startYear)); |
|
81 |
if (yearMatcher.find()){ |
|
82 |
String endYear = yearMatcher.group(); |
|
83 |
result.setEndYear(Integer.valueOf(endYear)); |
|
84 |
} |
|
85 |
}else if (dotDatePattern.matcher(periodString).matches()){ |
|
86 |
parseDotDatePattern(periodString, result); |
|
87 |
}else if (lifeSpanPattern.matcher(periodString).matches()){ |
|
88 |
parseLifeSpanPattern(periodString, result); |
|
89 |
}else if (standardPattern.matcher(periodString).matches()){ |
|
90 |
parseStandardPattern(periodString, result); |
|
91 |
//TODO first check ambiguity of parser results e.g. for 7/12/11 |
|
92 |
// }else if (isDateString(periodString)){ |
|
93 |
// String[] startEnd = makeStartEnd(periodString); |
|
94 |
// String start = startEnd[0]; |
|
95 |
// DateTime startDateTime = dateStringParse(start, true); |
|
96 |
// result.setStart(startDateTime); |
|
97 |
// if (startEnd.length > 1){ |
|
98 |
// DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
99 |
// ; |
|
100 |
// result.setEnd(endDateTime.toLocalDate()); |
|
101 |
// } |
|
102 |
|
|
103 |
}else{ |
|
104 |
result.setFreeText(periodString); |
|
105 |
} |
|
106 |
return result; |
|
107 |
} |
|
108 |
|
|
109 |
private static boolean isDateString(String periodString) { |
|
110 |
String[] startEnd = makeStartEnd(periodString); |
|
111 |
String start = startEnd[0]; |
|
112 |
DateTime startDateTime = dateStringParse(start, true); |
|
113 |
if (startDateTime == null){ |
|
114 |
return false; |
|
115 |
} |
|
116 |
if (startEnd.length > 1){ |
|
117 |
DateTime endDateTime = dateStringParse(startEnd[1], true); |
|
118 |
if (endDateTime != null){ |
|
119 |
return true; |
|
120 |
} |
|
121 |
} |
|
122 |
return false; |
|
123 |
} |
|
124 |
|
|
125 |
|
|
126 |
/** |
|
127 |
* @param periodString |
|
128 |
* @return |
|
129 |
*/ |
|
130 |
private static String[] makeStartEnd(String periodString) { |
|
131 |
String[] startEnd = new String[]{periodString}; |
|
132 |
if (periodString.contains("-") && periodString.matches("^-{2,}-^-{2,}")){ |
|
133 |
startEnd = periodString.split("-"); |
|
134 |
} |
|
135 |
return startEnd; |
|
136 |
} |
|
137 |
|
|
138 |
|
|
139 |
private static DateTime dateStringParse(String string, boolean strict) { |
|
140 |
DateFormat dateFormat = DateFormat.getDateInstance(); |
|
141 |
ParsePosition pos = new ParsePosition(0); |
|
142 |
Date a = dateFormat.parse(string, pos); |
|
143 |
if (a == null || pos.getIndex() != string.length()){ |
|
144 |
return null; |
|
145 |
} |
|
146 |
Calendar cal = Calendar.getInstance(); |
|
147 |
cal.setTime(a); |
|
148 |
DateTime result = new DateTime(cal); |
|
149 |
return result; |
|
150 |
} |
|
151 |
|
|
152 |
|
|
153 |
/** |
|
154 |
* @param periodString |
|
155 |
* @param result |
|
156 |
*/ |
|
157 |
private static void parseDotDatePattern(String periodString,TimePeriod result) { |
|
158 |
String[] dates = periodString.split("-"); |
|
159 |
Partial dtStart = null; |
|
160 |
Partial dtEnd = null; |
|
161 |
|
|
162 |
if (dates.length > 2 || dates.length <= 0){ |
|
163 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
164 |
result.setFreeText(periodString); |
|
165 |
}else { |
|
166 |
try { |
|
167 |
//start |
|
168 |
if (! StringUtils.isBlank(dates[0])){ |
|
169 |
dtStart = parseSingleDotDate(dates[0].trim()); |
|
170 |
} |
|
171 |
|
|
172 |
//end |
|
173 |
if (dates.length >= 2 && ! StringUtils.isBlank(dates[1])){ |
|
174 |
dtEnd = parseSingleDotDate(dates[1].trim()); |
|
175 |
} |
|
176 |
|
|
177 |
result.setStart(dtStart); |
|
178 |
result.setEnd(dtEnd); |
|
179 |
} catch (IllegalArgumentException e) { |
|
180 |
//logger.warn(e.getMessage()); |
|
181 |
result.setFreeText(periodString); |
|
182 |
} |
|
183 |
} |
|
184 |
} |
|
185 |
|
|
186 |
private static void parseLifeSpanPattern(String periodString, TimePeriod result) { |
|
187 |
|
|
188 |
try{ |
|
189 |
String[] years = periodString.split("--"); |
|
190 |
String start = years[0]; |
|
191 |
String end = years[1]; |
|
192 |
|
|
193 |
result.setStartYear(Integer.valueOf(start)); |
|
194 |
result.setEndYear(Integer.valueOf(end)); |
|
195 |
} catch (Exception e) { |
|
196 |
//logger.warn(e.getMessage()); |
|
197 |
result.setFreeText(periodString); |
|
198 |
} |
|
199 |
} |
|
200 |
|
|
201 |
|
|
202 |
/** |
|
203 |
* @param periodString |
|
204 |
* @param result |
|
205 |
*/ |
|
206 |
private static void parseStandardPattern(String periodString, |
|
207 |
TimePeriod result) { |
|
208 |
String[] years = periodString.split("-"); |
|
209 |
Partial dtStart = null; |
|
210 |
Partial dtEnd = null; |
|
211 |
|
|
212 |
if (years.length > 2 || years.length <= 0){ |
|
213 |
logger.warn("More than 1 '-' in period String: " + periodString); |
|
214 |
}else { |
|
215 |
try { |
|
216 |
//start |
|
217 |
if (! CdmUtils.isEmpty(years[0])){ |
|
218 |
dtStart = parseSingleDate(years[0].trim()); |
|
219 |
} |
|
220 |
|
|
221 |
//end |
|
222 |
if (years.length >= 2 && ! CdmUtils.isEmpty(years[1])){ |
|
223 |
years[1] = years[1].trim(); |
|
224 |
if (years[1].length()==2 && dtStart != null && dtStart.isSupported(DateTimeFieldType.year())){ |
|
225 |
years[1] = String.valueOf(dtStart.get(DateTimeFieldType.year())/100) + years[1]; |
|
226 |
} |
|
227 |
dtEnd = parseSingleDate(years[1]); |
|
228 |
} |
|
229 |
|
|
230 |
result.setStart(dtStart); |
|
231 |
result.setEnd(dtEnd); |
|
232 |
} catch (IllegalArgumentException e) { |
|
233 |
//logger.warn(e.getMessage()); |
|
234 |
result.setFreeText(periodString); |
|
235 |
} |
|
236 |
} |
|
237 |
} |
|
238 |
|
|
239 |
public static TimePeriod parseString(String strPeriod) { |
|
240 |
TimePeriod timePeriod = TimePeriod.NewInstance(); |
|
241 |
return parseString(timePeriod, strPeriod); |
|
242 |
} |
|
243 |
|
|
244 |
|
|
245 |
protected static Partial parseSingleDate(String singleDateString) throws IllegalArgumentException{ |
|
246 |
//FIXME until now only quick and dirty and incomplete |
|
247 |
Partial partial = new Partial(); |
|
248 |
singleDateString = singleDateString.trim(); |
|
249 |
if (CdmUtils.isNumeric(singleDateString)){ |
|
250 |
try { |
|
251 |
Integer year = Integer.valueOf(singleDateString.trim()); |
|
252 |
if (year < 1000 && year > 2100){ |
|
253 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
254 |
}else if (year < 1700 && year > 2100){ |
|
255 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
256 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
257 |
}else{ |
|
258 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
259 |
} |
|
260 |
} catch (NumberFormatException e) { |
|
261 |
logger.debug("Not a Integer format in getCalendar()"); |
|
262 |
throw new IllegalArgumentException(e); |
|
263 |
} |
|
264 |
}else{ |
|
265 |
throw new IllegalArgumentException("Until now only years can be parsed as single dates. But date is: " + singleDateString); |
|
266 |
} |
|
267 |
return partial; |
|
268 |
|
|
269 |
} |
|
270 |
|
|
271 |
protected static Partial parseSingleDotDate(String singleDateString) throws IllegalArgumentException{ |
|
272 |
Partial partial = new Partial(); |
|
273 |
singleDateString = singleDateString.trim(); |
|
274 |
String[] split = singleDateString.split("\\."); |
|
275 |
int length = split.length; |
|
276 |
if (length > 3){ |
|
277 |
throw new IllegalArgumentException(String.format("More than 2 dots in date '%s'", singleDateString)); |
|
278 |
} |
|
279 |
String strYear = split[split.length-1]; |
|
280 |
String strMonth = length >= 2? split[split.length-2]: null; |
|
281 |
String strDay = length >= 3? split[split.length-3]: null; |
|
282 |
|
|
283 |
|
|
284 |
try { |
|
285 |
Integer year = Integer.valueOf(strYear.trim()); |
|
286 |
Integer month = Integer.valueOf(strMonth.trim()); |
|
287 |
Integer day = Integer.valueOf(strDay.trim()); |
|
288 |
if (year < 1000 && year > 2100){ |
|
289 |
logger.warn("Not a valid year: " + year + ". Year must be between 1000 and 2100"); |
|
290 |
}else if (year < 1700 && year > 2100){ |
|
291 |
logger.warn("Not a valid taxonomic year: " + year + ". Year must be between 1750 and 2100"); |
|
292 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
293 |
}else{ |
|
294 |
partial = partial.with(TimePeriod.YEAR_TYPE, year); |
|
295 |
} |
|
296 |
if (month != null && month != 0){ |
|
297 |
partial = partial.with(TimePeriod.MONTH_TYPE, month); |
|
298 |
} |
|
299 |
if (day != null && day != 0){ |
|
300 |
partial = partial.with(TimePeriod.DAY_TYPE, day); |
|
301 |
} |
|
302 |
} catch (NumberFormatException e) { |
|
303 |
logger.debug("Not a Integer format somewhere in " + singleDateString); |
|
304 |
throw new IllegalArgumentException(e); |
|
305 |
} |
|
306 |
return partial; |
|
307 |
|
|
308 |
} |
|
309 |
|
|
310 |
} |
cdmlib-model/src/test/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplTest.java | ||
---|---|---|
1694 | 1694 |
|
1695 | 1695 |
} |
1696 | 1696 |
|
1697 |
@Test |
|
1698 |
public final void testExistingProblems(){ |
|
1699 |
//Canabio, issue with space |
|
1700 |
NonViralName<?> name = parser.parseReferencedName("Machaonia erythrocarpa var. hondurensis (Standl.) Borhidi" |
|
1701 |
+ " in Acta Bot. Hung. 46 (1-2): 30. 2004"); |
|
1702 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1703 |
TeamOrPersonBase<?> combinationAuthor = name.getCombinationAuthorship(); |
|
1704 |
assertEquals( "Borhidi", combinationAuthor.getNomenclaturalTitle()); |
|
1705 |
Reference nomRef = (Reference)name.getNomenclaturalReference(); |
|
1706 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1707 |
assertEquals("46 (1-2)", nomRef.getVolume()); |
|
1708 |
|
|
1709 |
//Canabio, detail with fig. |
|
1710 |
name = parser.parseReferencedName("Didymaea floribunda Rzed." |
|
1711 |
+ " in Bol. Soc. Bot. Méx. 44: 72, fig. 1. 1983"); |
|
1712 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1713 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1714 |
assertEquals( "Rzed.", combinationAuthor.getNomenclaturalTitle()); |
|
1715 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1716 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1717 |
assertEquals("44", nomRef.getVolume()); |
|
1718 |
assertEquals("72, fig. 1", name.getNomenclaturalMicroReference()); |
|
1719 |
|
|
1720 |
//fig with a-c and without dot |
|
1721 |
name = parser.parseReferencedName("Deppea guerrerensis Dwyer & Lorence" |
|
1722 |
+ " in Allertonia 4: 428. fig 4a-c. 1988"); // |
|
1723 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1724 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1725 |
assertEquals( "Dwyer & Lorence", combinationAuthor.getNomenclaturalTitle()); |
|
1726 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1727 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1728 |
assertEquals("4", nomRef.getVolume()); |
|
1729 |
assertEquals("428. fig 4a-c", name.getNomenclaturalMicroReference()); |
|
1730 |
|
|
1731 |
//issue with EN_DASH (3–4) |
|
1732 |
name = parser.parseReferencedName("Arachnothryx tacanensis (Lundell) Borhidi" |
|
1733 |
+ " in Acta Bot. Hung. 33 (3–4): 303. 1987"); |
|
1734 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1735 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1736 |
assertEquals( "Borhidi", combinationAuthor.getNomenclaturalTitle()); |
|
1737 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1738 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1739 |
assertEquals("33 (3–4)", nomRef.getVolume()); |
|
1740 |
assertEquals("303", name.getNomenclaturalMicroReference()); |
|
1741 |
|
|
1742 |
//fig with f. |
|
1743 |
name = parser.parseReferencedName("Stenotis Terrell" |
|
1744 |
+ " in Sida 19(4): 901–911, f. 1–2. 2001"); |
|
1745 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1746 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1747 |
assertEquals( "Terrell", combinationAuthor.getNomenclaturalTitle()); |
|
1748 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1749 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1750 |
assertEquals("19(4)", nomRef.getVolume()); |
|
1751 |
assertEquals("901–911, f. 1–2", name.getNomenclaturalMicroReference()); |
|
1752 |
|
|
1753 |
//pl |
|
1754 |
name = parser.parseReferencedName("Carapichea Aubl." |
|
1755 |
+ " in Hist. Pl. Guiane 1: 167, pl. 64. 1775"); |
|
1756 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1757 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1758 |
assertEquals( "Aubl.", combinationAuthor.getNomenclaturalTitle()); |
|
1759 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1760 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1761 |
assertEquals("1", nomRef.getVolume()); |
|
1762 |
assertEquals("167, pl. 64", name.getNomenclaturalMicroReference()); |
|
1763 |
|
|
1764 |
//fig with , |
|
1765 |
name = parser.parseReferencedName("Hoffmannia ixtlanensis Lorence" |
|
1766 |
+ " in Novon 4: 121. fig. 2a, b. 1994"); |
|
1767 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1768 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1769 |
assertEquals( "Lorence", combinationAuthor.getNomenclaturalTitle()); |
|
1770 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1771 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1772 |
assertEquals("4", nomRef.getVolume()); |
|
1773 |
assertEquals("121. fig. 2a, b", name.getNomenclaturalMicroReference()); |
|
1774 |
|
|
1775 |
//(Suppl.) |
|
1776 |
name = parser.parseReferencedName("Manettia costaricensis Wernham" |
|
1777 |
+ " in J. Bot. 57(Suppl.): 38. 1919"); |
|
1778 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1779 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1780 |
assertEquals( "Wernham", combinationAuthor.getNomenclaturalTitle()); |
|
1781 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1782 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1783 |
assertEquals("57(Suppl.)", nomRef.getVolume()); |
|
1784 |
assertEquals("38", name.getNomenclaturalMicroReference()); |
|
1785 |
|
|
1786 |
//NY. |
|
1787 |
name = parser.parseReferencedName("Crusea psyllioides (Kunth) W.R. Anderson" |
|
1788 |
+ " in Mem. NY. Bot. Gard. 22: 75. 1972"); |
|
1789 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1790 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1791 |
assertEquals( "W.R. Anderson", combinationAuthor.getNomenclaturalTitle()); |
|
1792 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1793 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1794 |
assertEquals("22", nomRef.getVolume()); |
|
1795 |
assertEquals("75", name.getNomenclaturalMicroReference()); |
|
1796 |
|
|
1797 |
//apostroph word in title |
|
1798 |
name = parser.parseReferencedName("Sabicea glabrescens Benth." |
|
1799 |
+ " in Hooker's J. Bot. Kew Gard. Misc. 3: 219. 1841"); |
|
1800 |
Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1801 |
combinationAuthor = name.getCombinationAuthorship(); |
|
1802 |
assertEquals( "Benth.", combinationAuthor.getNomenclaturalTitle()); |
|
1803 |
nomRef = (Reference)name.getNomenclaturalReference(); |
|
1804 |
assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1805 |
assertEquals("3", nomRef.getVolume()); |
|
1806 |
assertEquals("219", name.getNomenclaturalMicroReference()); |
|
1807 |
|
|
1808 |
// |
|
1809 |
// //(Hannover) place published |
|
1810 |
// name = parser.parseReferencedName("Pittoniotis trichantha Griseb." |
|
1811 |
// + " in Bonplandia (Hannover) 6 (1): 8. 1858"); |
|
1812 |
// Assert.assertFalse("Name should be parsable", name.isProtectedTitleCache()); |
|
1813 |
// combinationAuthor = name.getCombinationAuthorship(); |
|
1814 |
// assertEquals( "Griseb.", combinationAuthor.getNomenclaturalTitle()); |
|
1815 |
// nomRef = (Reference)name.getNomenclaturalReference(); |
|
1816 |
// assertEquals(ReferenceType.Article, nomRef.getType()); |
|
1817 |
// assertEquals("6 (1)", nomRef.getVolume()); |
|
1818 |
// assertEquals("8", name.getNomenclaturalMicroReference()); |
|
1819 |
} |
|
1820 |
|
|
1697 | 1821 |
} |
cdmlib-model/src/test/java/eu/etaxonomy/cdm/strategy/parser/TimePeriodParserTest.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2009 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
|
|
10 |
package eu.etaxonomy.cdm.strategy.parser; |
|
11 |
|
|
12 |
import static org.junit.Assert.assertFalse; |
|
13 |
import static org.junit.Assert.assertNotNull; |
|
14 |
import static org.junit.Assert.assertNull; |
|
15 |
import static org.junit.Assert.assertTrue; |
|
16 |
import org.junit.Assert; |
|
17 |
|
|
18 |
import org.apache.log4j.Logger; |
|
19 |
import org.joda.time.DateTimeFieldType; |
|
20 |
import org.joda.time.Partial; |
|
21 |
import org.junit.After; |
|
22 |
import org.junit.AfterClass; |
|
23 |
import org.junit.Before; |
|
24 |
import org.junit.BeforeClass; |
|
25 |
import org.junit.Test; |
|
26 |
|
|
27 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
28 |
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser; |
|
29 |
|
|
30 |
/** |
|
31 |
* @author a.mueller |
|
32 |
* |
|
33 |
*/ |
|
34 |
public class TimePeriodParserTest { |
|
35 |
private static final Logger logger = Logger.getLogger(TimePeriodParserTest.class); |
|
36 |
|
|
37 |
private TimePeriod onlyStartYear; |
|
38 |
private TimePeriod onlyEndYear; |
|
39 |
private TimePeriod startAndEndYear; |
|
40 |
private TimePeriod noStartAndEndYear; |
|
41 |
|
|
42 |
|
|
43 |
/** |
|
44 |
* @throws java.lang.Exception |
|
45 |
*/ |
|
46 |
@BeforeClass |
|
47 |
public static void setUpBeforeClass() throws Exception { |
|
48 |
} |
|
49 |
|
|
50 |
/** |
|
51 |
* @throws java.lang.Exception |
|
52 |
*/ |
|
53 |
@AfterClass |
|
54 |
public static void tearDownAfterClass() throws Exception { |
|
55 |
} |
|
56 |
|
|
57 |
/** |
|
58 |
* @throws java.lang.Exception |
|
59 |
*/ |
|
60 |
@Before |
|
61 |
public void setUp() throws Exception { |
|
62 |
onlyStartYear = TimePeriod.NewInstance(1922); |
|
63 |
onlyEndYear = TimePeriod.NewInstance(null, 1857);; |
|
64 |
startAndEndYear = TimePeriod.NewInstance(1931, 1957); |
|
65 |
Integer start = null; |
|
66 |
Integer end = null; |
|
67 |
noStartAndEndYear = TimePeriod.NewInstance(start, end);; |
|
68 |
} |
|
69 |
|
|
70 |
/** |
|
71 |
* @throws java.lang.Exception |
|
72 |
*/ |
|
73 |
@After |
|
74 |
public void tearDown() throws Exception { |
|
75 |
} |
|
76 |
|
|
77 |
|
|
78 |
//************************ TESTS ****************************************** |
|
79 |
|
|
80 |
|
|
81 |
@Test |
|
82 |
public void testParseSingleDateString() { |
|
83 |
String strDate = "1756"; |
|
84 |
Partial date = TimePeriodParser.parseSingleDate(strDate); |
|
85 |
assertNotNull(date); |
|
86 |
Assert.assertEquals(Integer.parseInt(strDate), date.get(DateTimeFieldType.year())); |
|
87 |
try { |
|
88 |
date.get(DateTimeFieldType.monthOfYear()); |
|
89 |
assertFalse(true); //should not be reached |
|
90 |
} catch (Exception e) { |
|
91 |
assertTrue(e instanceof IllegalArgumentException); |
|
92 |
} |
|
93 |
try { |
|
94 |
date.get(DateTimeFieldType.dayOfMonth()); |
|
95 |
assertFalse(true); //should not be reached |
|
96 |
} catch (Exception e) { |
|
97 |
assertTrue(e instanceof IllegalArgumentException); |
|
98 |
} |
|
99 |
//to be continued |
|
100 |
} |
|
101 |
|
|
102 |
|
|
103 |
/** |
|
104 |
* Test method for {@link eu.etaxonomy.cdm.model.common.TimePeriod#parseString(java.lang.String)}. |
|
105 |
*/ |
|
106 |
@Test |
|
107 |
public void testParseStringString() { |
|
108 |
String strTimePeriod = "1756"; |
|
109 |
TimePeriod tp1 = TimePeriodParser.parseString(strTimePeriod); |
|
110 |
assertNotNull(tp1); |
|
111 |
Assert.assertEquals(strTimePeriod, tp1.getYear()); |
|
112 |
Assert.assertEquals(strTimePeriod, String.valueOf(tp1.getStartYear())); |
|
113 |
assertNull(tp1.getEnd()); |
|
114 |
assertNull(tp1.getStartMonth()); |
|
115 |
strTimePeriod = "1756-88"; |
|
116 |
tp1 = TimePeriodParser.parseString(strTimePeriod); |
|
117 |
assertNotNull(tp1); |
|
118 |
Assert.assertEquals("1756-1788", tp1.getYear()); |
|
119 |
Assert.assertEquals("1756", String.valueOf(tp1.getStartYear())); |
|
120 |
Assert.assertEquals("1788", String.valueOf(tp1.getEndYear())); |
|
121 |
assertNull(tp1.getEndMonth()); |
|
122 |
assertNull(tp1.getStartMonth()); |
|
123 |
//unparsable |
|
124 |
String strUnparsablePeriod = "wef 1809-78"; |
|
125 |
TimePeriod tpUnparsable = TimePeriodParser.parseString(strUnparsablePeriod); |
|
126 |
assertNotNull(tpUnparsable); |
|
127 |
Assert.assertEquals(strUnparsablePeriod, tpUnparsable.getFreeText()); |
|
128 |
|
|
129 |
//"1806"[1807] |
|
130 |
String strCorrectedPeriod = "\"1806\"[1807]"; |
|
131 |
TimePeriod tpcorrected = TimePeriodParser.parseString(strCorrectedPeriod); |
|
132 |
assertNotNull(tpcorrected); |
|
133 |
Assert.assertEquals(strCorrectedPeriod, tpcorrected.getFreeText()); |
|
134 |
Assert.assertEquals("1807", tpcorrected.getYear()); |
|
135 |
|
|
136 |
|
|
137 |
//fl. 1806 |
|
138 |
String strFlPeriod = "fl. 1806?"; |
|
139 |
TimePeriod tpFl = TimePeriodParser.parseString(strFlPeriod); |
|
140 |
assertNotNull(tpFl); |
|
141 |
Assert.assertEquals(strFlPeriod, tpFl.getFreeText()); |
|
142 |
Assert.assertEquals("1806", tpFl.getYear()); |
|
143 |
|
|
144 |
String strCPeriod = "c. 1806-1810"; |
|
145 |
TimePeriod tpC = TimePeriodParser.parseString(strCPeriod); |
|
146 |
assertNotNull(tpC); |
|
147 |
Assert.assertEquals(strCPeriod, tpC.getFreeText()); |
|
148 |
Assert.assertEquals(Integer.valueOf(1806), tpC.getStartYear()); |
|
149 |
Assert.assertEquals(Integer.valueOf(1810), tpC.getEndYear()); |
|
150 |
Assert.assertEquals("1806-1810", tpC.getYear()); |
|
151 |
|
|
152 |
//1.1.2011 |
|
153 |
String strDotDate = "1.2.2011"; |
|
154 |
TimePeriod tp = TimePeriodParser.parseString(strDotDate); |
|
155 |
assertNotNull(tp); |
|
156 |
Assert.assertEquals(strDotDate, tp.toString()); |
|
157 |
Assert.assertEquals("2011", tp.getYear()); |
|
158 |
Assert.assertEquals(Integer.valueOf(2), tp.getStartMonth()); |
|
159 |
Assert.assertEquals(Integer.valueOf(1), tp.getStartDay()); |
|
160 |
|
|
161 |
strDotDate = "31.03.2012"; |
|
162 |
tp = TimePeriodParser.parseString(strDotDate); |
|
163 |
assertNotNull(tp); |
|
164 |
Assert.assertEquals("31.3.2012", tp.toString()); |
|
165 |
Assert.assertEquals("2012", tp.getYear()); |
|
166 |
Assert.assertEquals(Integer.valueOf(3), tp.getStartMonth()); |
|
167 |
Assert.assertEquals(Integer.valueOf(31), tp.getStartDay()); |
|
168 |
|
|
169 |
strDotDate = "00.04.2013"; |
|
170 |
tp = TimePeriodParser.parseString(strDotDate); |
|
171 |
assertNotNull(tp); |
|
172 |
Assert.assertEquals("4.2013", tp.toString()); |
|
173 |
Assert.assertEquals("2013", tp.getYear()); |
|
174 |
Assert.assertEquals(Integer.valueOf(4), tp.getStartMonth()); |
|
175 |
Assert.assertEquals(null, tp.getStartDay()); |
|
176 |
|
|
177 |
strDotDate = "13.00.2014"; |
|
178 |
tp = TimePeriodParser.parseString(strDotDate); |
|
179 |
assertNotNull(tp); |
|
180 |
Assert.assertEquals("13.xx.2014", tp.toString()); |
|
181 |
Assert.assertEquals("2014", tp.getYear()); |
|
182 |
Assert.assertEquals(null, tp.getStartMonth()); |
|
183 |
Assert.assertEquals(Integer.valueOf(13), tp.getStartDay()); |
|
184 |
|
|
185 |
strDotDate = "31.12.2015 - 02.01.2016"; |
|
186 |
tp = TimePeriodParser.parseString(strDotDate); |
|
187 |
assertNotNull(tp); |
|
188 |
Assert.assertEquals("31.12.2015-2.1.2016", tp.toString()); |
|
189 |
Assert.assertEquals("2015-2016", tp.getYear()); |
|
190 |
Assert.assertEquals(Integer.valueOf(2015), tp.getStartYear()); |
|
191 |
Assert.assertEquals(Integer.valueOf(12), tp.getStartMonth()); |
|
192 |
Assert.assertEquals(Integer.valueOf(31), tp.getStartDay()); |
|
193 |
Assert.assertEquals(Integer.valueOf(2016), tp.getEndYear()); |
|
194 |
Assert.assertEquals(Integer.valueOf(1), tp.getEndMonth()); |
|
195 |
Assert.assertEquals(Integer.valueOf(2), tp.getEndDay()); |
|
196 |
} |
|
197 |
|
|
198 |
|
|
199 |
} |
|
1 |
/** |
|
2 |
* Copyright (C) 2009 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
|
|
10 |
package eu.etaxonomy.cdm.strategy.parser; |
|
11 |
|
|
12 |
import static org.junit.Assert.assertFalse; |
|
13 |
import static org.junit.Assert.assertNotNull; |
|
14 |
import static org.junit.Assert.assertNull; |
|
15 |
import static org.junit.Assert.assertTrue; |
|
16 |
|
|
17 |
import org.apache.log4j.Logger; |
|
18 |
import org.joda.time.DateTimeFieldType; |
|
19 |
import org.joda.time.Partial; |
|
20 |
import org.junit.After; |
|
21 |
import org.junit.AfterClass; |
|
22 |
import org.junit.Assert; |
|
23 |
import org.junit.Before; |
|
24 |
import org.junit.BeforeClass; |
|
25 |
import org.junit.Test; |
|
26 |
|
|
27 |
import eu.etaxonomy.cdm.common.UTF8; |
|
28 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
29 |
|
|
30 |
/** |
|
31 |
* @author a.mueller |
|
32 |
* |
|
33 |
*/ |
|
34 |
public class TimePeriodParserTest { |
|
35 |
private static final Logger logger = Logger.getLogger(TimePeriodParserTest.class); |
|
36 |
|
|
37 |
private TimePeriod onlyStartYear; |
|
38 |
private TimePeriod onlyEndYear; |
|
39 |
private TimePeriod startAndEndYear; |
|
40 |
private TimePeriod noStartAndEndYear; |
|
41 |
|
|
42 |
|
|
43 |
/** |
|
44 |
* @throws java.lang.Exception |
|
45 |
*/ |
|
46 |
@BeforeClass |
|
47 |
public static void setUpBeforeClass() throws Exception { |
|
48 |
} |
|
49 |
|
|
50 |
/** |
|
51 |
* @throws java.lang.Exception |
|
52 |
*/ |
|
53 |
@AfterClass |
|
54 |
public static void tearDownAfterClass() throws Exception { |
|
55 |
} |
|
56 |
|
|
57 |
/** |
|
58 |
* @throws java.lang.Exception |
|
59 |
*/ |
|
60 |
@Before |
|
61 |
public void setUp() throws Exception { |
|
62 |
onlyStartYear = TimePeriod.NewInstance(1922); |
|
63 |
onlyEndYear = TimePeriod.NewInstance(null, 1857); |
|
64 |
startAndEndYear = TimePeriod.NewInstance(1931, 1957); |
|
65 |
Integer start = null; |
|
66 |
Integer end = null; |
|
67 |
noStartAndEndYear = TimePeriod.NewInstance(start, end); |
|
68 |
} |
|
69 |
|
|
70 |
/** |
|
71 |
* @throws java.lang.Exception |
|
72 |
*/ |
|
73 |
@After |
|
74 |
public void tearDown() throws Exception { |
|
75 |
} |
|
76 |
|
|
77 |
|
|
78 |
//************************ TESTS ****************************************** |
|
79 |
|
|
80 |
|
|
81 |
@Test |
|
82 |
public void testParseSingleDateString() { |
|
83 |
String strDate = "1756"; |
|
84 |
Partial date = TimePeriodParser.parseSingleDate(strDate); |
|
85 |
assertNotNull(date); |
|
86 |
Assert.assertEquals(Integer.parseInt(strDate), date.get(DateTimeFieldType.year())); |
|
87 |
try { |
|
88 |
date.get(DateTimeFieldType.monthOfYear()); |
|
89 |
assertFalse(true); //should not be reached |
|
90 |
} catch (Exception e) { |
|
91 |
assertTrue(e instanceof IllegalArgumentException); |
|
92 |
} |
|
93 |
try { |
|
94 |
date.get(DateTimeFieldType.dayOfMonth()); |
|
95 |
assertFalse(true); //should not be reached |
|
96 |
} catch (Exception e) { |
|
97 |
assertTrue(e instanceof IllegalArgumentException); |
|
98 |
} |
|
99 |
//to be continued |
|
100 |
} |
|
101 |
|
|
102 |
|
|
103 |
/** |
|
104 |
* Test method for {@link eu.etaxonomy.cdm.model.common.TimePeriod#parseString(java.lang.String)}. |
|
105 |
*/ |
|
106 |
@Test |
|
107 |
public void testParseStringString() { |
|
108 |
String strTimePeriod = "1756"; |
|
109 |
TimePeriod tp1 = TimePeriodParser.parseString(strTimePeriod); |
|
110 |
assertNotNull(tp1); |
|
111 |
Assert.assertEquals(strTimePeriod, tp1.getYear()); |
|
112 |
Assert.assertEquals(strTimePeriod, String.valueOf(tp1.getStartYear())); |
|
113 |
assertNull(tp1.getEnd()); |
|
114 |
assertNull(tp1.getStartMonth()); |
|
115 |
strTimePeriod = "1756-88"; |
|
116 |
tp1 = TimePeriodParser.parseString(strTimePeriod); |
|
117 |
assertNotNull(tp1); |
|
118 |
Assert.assertEquals("1756-1788", tp1.getYear()); |
|
119 |
Assert.assertEquals("1756", String.valueOf(tp1.getStartYear())); |
|
120 |
Assert.assertEquals("1788", String.valueOf(tp1.getEndYear())); |
|
121 |
assertNull(tp1.getEndMonth()); |
|
122 |
assertNull(tp1.getStartMonth()); |
|
123 |
//unparsable |
|
124 |
String strUnparsablePeriod = "wef 1809-78"; |
|
125 |
TimePeriod tpUnparsable = TimePeriodParser.parseString(strUnparsablePeriod); |
|
126 |
assertNotNull(tpUnparsable); |
|
127 |
Assert.assertEquals(strUnparsablePeriod, tpUnparsable.getFreeText()); |
|
128 |
|
|
129 |
//"1806"[1807] |
|
130 |
String strCorrectedPeriod = "\"1806\"[1807]"; |
|
131 |
TimePeriod tpcorrected = TimePeriodParser.parseString(strCorrectedPeriod); |
|
132 |
assertNotNull(tpcorrected); |
|
133 |
Assert.assertEquals(strCorrectedPeriod, tpcorrected.getFreeText()); |
|
134 |
Assert.assertEquals("1807", tpcorrected.getYear()); |
|
135 |
|
|
136 |
//„1806‟[1807] |
|
137 |
String strCorrectedEnPeriod = UTF8.ENGLISH_QUOT_START + "1806"+UTF8.ENGLISH_QUOT_END+"[1807]"; |
|
138 |
TimePeriod tpcorrectedEn = TimePeriodParser.parseString(strCorrectedEnPeriod); |
|
139 |
assertNotNull(tpcorrectedEn); |
|
140 |
Assert.assertEquals(strCorrectedEnPeriod, tpcorrectedEn.getFreeText()); |
|
141 |
Assert.assertEquals("1807", tpcorrectedEn.getYear()); |
|
142 |
|
|
143 |
|
|
144 |
//fl. 1806 |
|
145 |
String strFlPeriod = "fl. 1806?"; |
|
146 |
TimePeriod tpFl = TimePeriodParser.parseString(strFlPeriod); |
|
147 |
assertNotNull(tpFl); |
|
148 |
Assert.assertEquals(strFlPeriod, tpFl.getFreeText()); |
|
149 |
Assert.assertEquals("1806", tpFl.getYear()); |
|
150 |
|
|
151 |
String strCPeriod = "c. 1806-1810"; |
|
152 |
TimePeriod tpC = TimePeriodParser.parseString(strCPeriod); |
|
153 |
assertNotNull(tpC); |
|
154 |
Assert.assertEquals(strCPeriod, tpC.getFreeText()); |
|
155 |
Assert.assertEquals(Integer.valueOf(1806), tpC.getStartYear()); |
|
156 |
Assert.assertEquals(Integer.valueOf(1810), tpC.getEndYear()); |
|
157 |
Assert.assertEquals("1806-1810", tpC.getYear()); |
|
158 |
|
|
159 |
//1.1.2011 |
|
160 |
String strDotDate = "1.2.2011"; |
|
161 |
TimePeriod tp = TimePeriodParser.parseString(strDotDate); |
|
162 |
assertNotNull(tp); |
|
163 |
Assert.assertEquals(strDotDate, tp.toString()); |
|
164 |
Assert.assertEquals("2011", tp.getYear()); |
|
165 |
Assert.assertEquals(Integer.valueOf(2), tp.getStartMonth()); |
|
166 |
Assert.assertEquals(Integer.valueOf(1), tp.getStartDay()); |
|
167 |
|
|
168 |
strDotDate = "31.03.2012"; |
|
169 |
tp = TimePeriodParser.parseString(strDotDate); |
|
170 |
assertNotNull(tp); |
|
171 |
Assert.assertEquals("31.3.2012", tp.toString()); |
|
172 |
Assert.assertEquals("2012", tp.getYear()); |
|
173 |
Assert.assertEquals(Integer.valueOf(3), tp.getStartMonth()); |
|
174 |
Assert.assertEquals(Integer.valueOf(31), tp.getStartDay()); |
|
175 |
|
|
176 |
strDotDate = "00.04.2013"; |
|
177 |
tp = TimePeriodParser.parseString(strDotDate); |
|
178 |
assertNotNull(tp); |
|
179 |
Assert.assertEquals("4.2013", tp.toString()); |
|
180 |
Assert.assertEquals("2013", tp.getYear()); |
|
181 |
Assert.assertEquals(Integer.valueOf(4), tp.getStartMonth()); |
|
182 |
Assert.assertEquals(null, tp.getStartDay()); |
|
183 |
|
|
184 |
strDotDate = "13.00.2014"; |
|
185 |
tp = TimePeriodParser.parseString(strDotDate); |
|
186 |
assertNotNull(tp); |
|
187 |
Assert.assertEquals("13.xx.2014", tp.toString()); |
|
188 |
Assert.assertEquals("2014", tp.getYear()); |
|
189 |
Assert.assertEquals(null, tp.getStartMonth()); |
|
190 |
Assert.assertEquals(Integer.valueOf(13), tp.getStartDay()); |
|
191 |
|
|
192 |
strDotDate = "31.12.2015 - 02.01.2016"; |
|
193 |
tp = TimePeriodParser.parseString(strDotDate); |
|
194 |
assertNotNull(tp); |
|
195 |
Assert.assertEquals("31.12.2015-2.1.2016", tp.toString()); |
|
196 |
Assert.assertEquals("2015-2016", tp.getYear()); |
|
197 |
Assert.assertEquals(Integer.valueOf(2015), tp.getStartYear()); |
|
198 |
Assert.assertEquals(Integer.valueOf(12), tp.getStartMonth()); |
|
199 |
Assert.assertEquals(Integer.valueOf(31), tp.getStartDay()); |
|
200 |
Assert.assertEquals(Integer.valueOf(2016), tp.getEndYear()); |
|
201 |
Assert.assertEquals(Integer.valueOf(1), tp.getEndMonth()); |
|
202 |
Assert.assertEquals(Integer.valueOf(2), tp.getEndDay()); |
|
203 |
} |
|
204 |
|
|
205 |
|
|
206 |
} |
Also available in: Unified diff
#5909 Improve referenced name parser