Project

General

Profile

Revision 4b24820e

ID4b24820eeb15cba02249146627ac18b48187bdbb
Parent 2f4f765e
Child 8f08a3ae

Added by Andreas Müller 5 months ago

ref #9071 improve with-month-name period parsing

View differences:

cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/TimePeriodParser.java
46 46
//	private static final Pattern uncorrectYearPatter = Pattern.compile(NonViralNameParserImplRegExBase.incorrectYearPhrase);
47 47

  
48 48
	//case fl. 1806 or c. 1806 or fl. 1806?
49
	private static final Pattern prefixedYearPattern =  Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*-\\s*\\d{4})?\\??");
49
	private static final Pattern prefixedYearPattern =  Pattern.compile("(fl|c)\\.\\s*\\d{4}(\\s*"+SEP+"\\s*\\d{4})?\\??");
50 50
	//standard
51
	private static final Pattern standardPattern =  Pattern.compile("\\s*\\d{2,4}(\\s*-(\\s*\\d{2,4})?|\\+)?");
51
	private static final Pattern standardPattern =  Pattern.compile("\\s*\\d{2,4}(\\s*"+SEP+"(\\s*\\d{2,4})?|\\+)?");
52

  
52 53
	private static final String strDotDate = strDay + "\\.[01]?\\d\\.\\d{4,4}";
53
	private static final String strDotDatePeriodPattern = String.format("%s(\\s*-\\s*%s|\\+)?", strDotDate, strDotDate);
54
	private static final Pattern dotDatePattern =  Pattern.compile(strDotDatePeriodPattern);
54
	private static final String strDotDatePeriod = String.format("%s(\\s*-\\s*%s|\\+)?", strDotDate, strDotDate);
55
	private static final Pattern dotDatePattern =  Pattern.compile(strDotDatePeriod);
56

  
55 57
	private static final String strSlashDate = strDay + "\\/[01]?\\d\\/\\d{4,4}";
56
	private static final String strSlashDatePeriodPattern = String.format("%s(\\s*-\\s*%s|\\+)?", strSlashDate, strSlashDate);
57
	private static final Pattern slashDatePattern =  Pattern.compile(strSlashDatePeriodPattern);
58
	private static final String strSlashDatePeriod = String.format("%s(\\s*"+SEP+"\\s*%s|\\+)?", strSlashDate, strSlashDate);
59
	private static final Pattern slashDatePattern =  Pattern.compile(strSlashDatePeriod);
60

  
58 61
	private static final Pattern lifeSpanPattern =  Pattern.compile(String.format("%s--%s", firstYearPattern, firstYearPattern));
62

  
59 63
	private static final String strMonthes = "((Jan|Feb|Aug|Sept?|Oct(ober)?|Nov|Dec)\\.?|(Mar(ch)?|Apr(il)?|Ma(yi)|June?|July?))";
60
	public static final String strDateWithMonthes = "("+ strDay + dotOrWs + ")?" + strMonthes + dotOrWs + "\\d{4,4}\\+?";
61
	public static final String strStartDateWithMonthes = "(" + strDay + "|(" + strDay + dotOrWs + ")?" + strMonthes + ")(" + dotOrWs + "\\d{4,4})?";
62
	public static final String strDateWithMonthesPeriod = "("+strStartDateWithMonthes +SEP+")?" + strDateWithMonthes;
64
	public static final String strDateWithMonthesPeriod = "(("+ strDay + "|(" + strDay + dotOrWs +")?" + strMonthes + "|((" + strDay + dotOrWs +")?" + strMonthes  + dotOrWs + ")?" + "\\d{4,4})" + SEP + ")?" +
65
	        "(("+ strDay + dotOrWs + ")?" + strMonthes + dotOrWs + ")?\\d{4,4}\\+?";
63 66
    private static final Pattern dateWithMonthNamePattern = Pattern.compile(strDateWithMonthesPeriod);
67

  
64 68
    private static final String strDateYearMonthDay = "(\\d{4,4}" + dashOrWs + ")?" + strMonthes + "(" + dashOrWs + "[0-3]?\\d)?\\+?";
65 69
	private static final Pattern dateYearMonthDayPattern = Pattern.compile(strDateYearMonthDay);
66 70

  
67 71
	public static <T extends TimePeriod> T parseString(T timePeriod, String periodString){
68
		//TODO until now only quick and dirty (and partly wrong)
69 72
		T result = timePeriod;
70 73

  
71 74
		if(timePeriod == null){
cdmlib-model/src/test/java/eu/etaxonomy/cdm/strategy/parser/TimePeriodParserTest.java
320 320
	}
321 321

  
322 322
    @Test
323
    public void testParseDateWithMonthPeriods() {
324
        String strDate = "24 Aug 1957-14 Oct 1988";
325
        TimePeriod tp = TimePeriodParser.parseString(strDate);
326
        assertNotNull(tp);
327
        Assert.assertEquals("24 Aug 1957–14 Oct 1988", tp.toString());
328
        Assert.assertEquals("1957–1988", tp.getYear());
329
        Assert.assertEquals(Integer.valueOf(1957), tp.getStartYear());
330
        Assert.assertEquals(Integer.valueOf(8), tp.getStartMonth());
331
        Assert.assertEquals(Integer.valueOf(24), tp.getStartDay());
332
        Assert.assertEquals(Integer.valueOf(1988), tp.getEndYear());
333
        Assert.assertEquals(Integer.valueOf(10), tp.getEndMonth());
334
        Assert.assertEquals(Integer.valueOf(14), tp.getEndDay());
335

  
336
        strDate = "24 Aug 1957–1988";
337
        tp = TimePeriodParser.parseString(strDate);
338
        assertNotNull(tp);
339
        Assert.assertEquals(strDate, tp.toString());
340
        Assert.assertEquals("1957–1988", tp.getYear());
341
        Assert.assertEquals(Integer.valueOf(1957), tp.getStartYear());
342
        Assert.assertEquals(Integer.valueOf(8), tp.getStartMonth());
343
        Assert.assertEquals(Integer.valueOf(24), tp.getStartDay());
344
        Assert.assertEquals(Integer.valueOf(1988), tp.getEndYear());
345
        Assert.assertNull(tp.getEndMonth());
346
        Assert.assertNull(tp.getEndDay());
347

  
348
        strDate = "1957–14 Oct 1988";
349
        tp = TimePeriodParser.parseString(strDate);
350
        assertNotNull(tp);
351
        Assert.assertEquals("1957–14 Oct 1988", tp.toString());
352
        Assert.assertEquals("1957–1988", tp.getYear());
353
        Assert.assertEquals(Integer.valueOf(1957), tp.getStartYear());
354
        Assert.assertNull(tp.getStartMonth());
355
        Assert.assertNull(tp.getStartDay());
356
        Assert.assertEquals(Integer.valueOf(1988), tp.getEndYear());
357
        Assert.assertEquals(Integer.valueOf(10), tp.getEndMonth());
358
        Assert.assertEquals(Integer.valueOf(14), tp.getEndDay());
359
    }
360

  
361
    @Test
323 362
    public void testParseVerbatim() {
324 363
        String strDate = "1957 [\"1958\"]";
325 364
        VerbatimTimePeriod tp = TimePeriodParser.parseStringVerbatim(strDate);

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)