Skip to content

Commit 9033f02

Browse files
authored
Merge pull request #834 from akx/improve-date-parse
Improve date/time parsing
2 parents 89686fc + 8a5e4bd commit 9033f02

2 files changed

Lines changed: 73 additions & 25 deletions

File tree

babel/dates.py

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,11 @@ def get_period_id(time, tzinfo=None, type=None, locale=LC_TIME):
11381138
return "pm"
11391139

11401140

1141-
def parse_date(string, locale=LC_TIME):
1141+
class ParseError(ValueError):
1142+
pass
1143+
1144+
1145+
def parse_date(string, locale=LC_TIME, format='medium'):
11421146
"""Parse a date from a string.
11431147
11441148
This function uses the date format for the locale as a hint to determine
@@ -1151,14 +1155,19 @@ def parse_date(string, locale=LC_TIME):
11511155
11521156
:param string: the string containing the date
11531157
:param locale: a `Locale` object or a locale identifier
1158+
:param format: the format to use (see ``get_date_format``)
11541159
"""
1160+
numbers = re.findall(r'(\d+)', string)
1161+
if not numbers:
1162+
raise ParseError("No numbers were found in input")
1163+
11551164
# TODO: try ISO format first?
1156-
format = get_date_format(locale=locale).pattern.lower()
1157-
year_idx = format.index('y')
1158-
month_idx = format.index('m')
1165+
format_str = get_date_format(format=format, locale=locale).pattern.lower()
1166+
year_idx = format_str.index('y')
1167+
month_idx = format_str.index('m')
11591168
if month_idx < 0:
1160-
month_idx = format.index('l')
1161-
day_idx = format.index('d')
1169+
month_idx = format_str.index('l')
1170+
day_idx = format_str.index('d')
11621171

11631172
indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]
11641173
indexes.sort()
@@ -1167,7 +1176,6 @@ def parse_date(string, locale=LC_TIME):
11671176
# FIXME: this currently only supports numbers, but should also support month
11681177
# names, both in the requested locale, and english
11691178

1170-
numbers = re.findall(r'(\d+)', string)
11711179
year = numbers[indexes['Y']]
11721180
if len(year) == 2:
11731181
year = 2000 + int(year)
@@ -1180,7 +1188,7 @@ def parse_date(string, locale=LC_TIME):
11801188
return date(year, month, day)
11811189

11821190

1183-
def parse_time(string, locale=LC_TIME):
1191+
def parse_time(string, locale=LC_TIME, format='medium'):
11841192
"""Parse a time from a string.
11851193
11861194
This function uses the time format for the locale as a hint to determine
@@ -1191,29 +1199,42 @@ def parse_time(string, locale=LC_TIME):
11911199
11921200
:param string: the string containing the time
11931201
:param locale: a `Locale` object or a locale identifier
1202+
:param format: the format to use (see ``get_time_format``)
11941203
:return: the parsed time
11951204
:rtype: `time`
11961205
"""
1206+
numbers = re.findall(r'(\d+)', string)
1207+
if not numbers:
1208+
raise ParseError("No numbers were found in input")
1209+
11971210
# TODO: try ISO format first?
1198-
format = get_time_format(locale=locale).pattern.lower()
1199-
hour_idx = format.index('h')
1211+
format_str = get_time_format(format=format, locale=locale).pattern.lower()
1212+
hour_idx = format_str.index('h')
12001213
if hour_idx < 0:
1201-
hour_idx = format.index('k')
1202-
min_idx = format.index('m')
1203-
sec_idx = format.index('s')
1214+
hour_idx = format_str.index('k')
1215+
min_idx = format_str.index('m')
1216+
sec_idx = format_str.index('s')
12041217

12051218
indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]
12061219
indexes.sort()
12071220
indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
12081221

1209-
# FIXME: support 12 hour clock, and 0-based hour specification
1210-
# and seconds should be optional, maybe minutes too
1211-
# oh, and time-zones, of course
1212-
1213-
numbers = re.findall(r'(\d+)', string)
1214-
hour = int(numbers[indexes['H']])
1215-
minute = int(numbers[indexes['M']])
1216-
second = int(numbers[indexes['S']])
1222+
# TODO: support time zones
1223+
1224+
# Check if the format specifies a period to be used;
1225+
# if it does, look for 'pm' to figure out an offset.
1226+
hour_offset = 0
1227+
if 'a' in format_str:
1228+
if 'pm' in string.lower():
1229+
hour_offset = 12
1230+
1231+
# Parse up to three numbers from the string.
1232+
minute = second = 0
1233+
hour = int(numbers[indexes['H']]) + hour_offset
1234+
if len(numbers) > 1:
1235+
minute = int(numbers[indexes['M']])
1236+
if len(numbers) > 2:
1237+
second = int(numbers[indexes['S']])
12171238
return time(hour, minute, second)
12181239

12191240

tests/test_dates.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -775,10 +775,37 @@ def test_format_timedelta():
775775
def test_parse_date():
776776
assert dates.parse_date('4/1/04', locale='en_US') == date(2004, 4, 1)
777777
assert dates.parse_date('01.04.2004', locale='de_DE') == date(2004, 4, 1)
778-
779-
780-
def test_parse_time():
781-
assert dates.parse_time('15:30:00', locale='en_US') == time(15, 30)
778+
assert dates.parse_date('2004-04-01', locale='sv_SE', format='short') == date(2004, 4, 1)
779+
780+
781+
@pytest.mark.parametrize('input, expected', [
782+
# base case, fully qualified time
783+
('15:30:00', time(15, 30)),
784+
# test digits
785+
('15:30', time(15, 30)),
786+
('3:30', time(3, 30)),
787+
('00:30', time(0, 30)),
788+
# test am parsing
789+
('03:30 am', time(3, 30)),
790+
('3:30:21 am', time(3, 30, 21)),
791+
('3:30 am', time(3, 30)),
792+
# test pm parsing
793+
('03:30 pm', time(15, 30)),
794+
('03:30 pM', time(15, 30)),
795+
('03:30 Pm', time(15, 30)),
796+
('03:30 PM', time(15, 30)),
797+
# test hour-only parsing
798+
('4 pm', time(16, 0)),
799+
])
800+
def test_parse_time(input, expected):
801+
assert dates.parse_time(input, locale='en_US') == expected
802+
803+
804+
@pytest.mark.parametrize('case', ['', 'a', 'aaa'])
805+
@pytest.mark.parametrize('func', [dates.parse_date, dates.parse_time])
806+
def test_parse_errors(case, func):
807+
with pytest.raises(dates.ParseError):
808+
func(case, locale='en_US')
782809

783810

784811
def test_datetime_format_get_week_number():

0 commit comments

Comments
 (0)