Skip to content

Commit c023070

Browse files
committed
eli-579 amending token parser to parse out function details
1 parent 9b4f10d commit c023070

3 files changed

Lines changed: 173 additions & 10 deletions

File tree

src/eligibility_signposting_api/services/processors/token_parser.py

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,30 @@ class ParsedToken:
1313
Example: "PERSON" or "TARGET"
1414
attribute_name : str
1515
Example: "POSTCODE" or "RSV"
16-
attribute_value : int
16+
attribute_value : str | None
1717
Example: "LAST_SUCCESSFUL_DATE" if attribute_level is TARGET
18-
format : str
18+
format : str | None
1919
Example: "%d %B %Y" if DATE formatting is used
20+
function_name : str | None
21+
Example: "ADD_DAYS" for derived value functions
22+
function_args : str | None
23+
Example: "91" for ADD_DAYS(91)
2024
"""
2125

2226
attribute_level: str
2327
attribute_name: str
2428
attribute_value: str | None
2529
format: str | None
30+
function_name: str | None = None
31+
function_args: str | None = None
2632

2733

2834
class TokenParser:
2935
MIN_TOKEN_PARTS = 2
36+
# Pattern for function calls like ADD_DAYS(91) - captures function name and args
37+
FUNCTION_PATTERN = re.compile(r":([A-Z_]+)\(([^()]*)\)", re.IGNORECASE)
38+
# Pattern for DATE format - special case as it's already supported
39+
DATE_PATTERN = re.compile(r":DATE\(([^()]*)\)", re.IGNORECASE)
3040

3141
@staticmethod
3242
def parse(token: str) -> ParsedToken:
@@ -35,8 +45,15 @@ def parse(token: str) -> ParsedToken:
3545
Strip the surrounding [[ ]]
3646
Check for empty body after stripping, e.g., '[[]]'
3747
Check for empty parts created by leading/trailing dots or tokens with no dot
38-
Check if the name contains a date format
48+
Check if the name contains a date format or function call
3949
Return a ParsedToken object
50+
51+
Supported formats:
52+
- [[PERSON.AGE]] - Simple person attribute
53+
- [[TARGET.COVID.LAST_SUCCESSFUL_DATE]] - Target attribute
54+
- [[PERSON.DATE_OF_BIRTH:DATE(%d %B %Y)]] - With date formatting
55+
- [[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS(91)]] - Derived value function
56+
- [[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS(91):DATE(%d %B %Y)]] - Function with date format
4057
"""
4158

4259
token_body = token[2:-2]
@@ -53,14 +70,20 @@ def parse(token: str) -> ParsedToken:
5370
token_level = token_parts[0].upper()
5471
token_name = token_parts[-1]
5572

56-
format_match = re.search(r":DATE\(([^()]*)\)", token_name, re.IGNORECASE)
57-
if not format_match and len(token_name.split(":")) > 1:
58-
message = "Invalid token format."
59-
raise ValueError(message)
73+
# Extract function call (e.g., ADD_DAYS(91))
74+
function_name, function_args = TokenParser._extract_function(token_name)
6075

76+
# Extract date format
77+
format_match = TokenParser.DATE_PATTERN.search(token_name)
6178
format_str = format_match.group(1) if format_match else None
6279

63-
last_part = re.sub(r":DATE\([^)]*\)", "", token_name, flags=re.IGNORECASE)
80+
# Validate format - if there's a colon but no valid pattern, it's invalid
81+
if not format_match and not function_name and len(token_name.split(":")) > 1:
82+
message = "Invalid token format."
83+
raise ValueError(message)
84+
85+
# Remove function and date patterns to get the clean attribute name
86+
last_part = TokenParser._clean_attribute_name(token_name)
6487

6588
if len(token_parts) == TokenParser.MIN_TOKEN_PARTS:
6689
name = last_part.upper()
@@ -69,4 +92,42 @@ def parse(token: str) -> ParsedToken:
6992
name = token_parts[1].upper()
7093
value = last_part.upper()
7194

72-
return ParsedToken(attribute_level=token_level, attribute_name=name, attribute_value=value, format=format_str)
95+
return ParsedToken(
96+
attribute_level=token_level,
97+
attribute_name=name,
98+
attribute_value=value,
99+
format=format_str,
100+
function_name=function_name,
101+
function_args=function_args,
102+
)
103+
104+
@staticmethod
105+
def _extract_function(token_name: str) -> tuple[str | None, str | None]:
106+
"""Extract function name and arguments from token name.
107+
108+
Args:
109+
token_name: The last part of the token (e.g., 'NEXT_DOSE_DUE:ADD_DAYS(91)')
110+
111+
Returns:
112+
Tuple of (function_name, function_args) or (None, None) if no function
113+
"""
114+
# Find all function matches (excluding DATE which is handled separately)
115+
for match in TokenParser.FUNCTION_PATTERN.finditer(token_name):
116+
func_name = match.group(1).upper()
117+
if func_name != "DATE":
118+
return func_name, match.group(2)
119+
return None, None
120+
121+
@staticmethod
122+
def _clean_attribute_name(token_name: str) -> str:
123+
"""Remove function calls and date formatting from token name.
124+
125+
Args:
126+
token_name: The raw token name with potential modifiers
127+
128+
Returns:
129+
Clean attribute name
130+
"""
131+
# Remove date format and other function calls
132+
without_date = TokenParser.DATE_PATTERN.sub("", token_name)
133+
return TokenParser.FUNCTION_PATTERN.sub("", without_date)

tests/unit/services/processors/test_token_parser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,17 @@ def test_parse_invalid_tokens_raises_error(self, token):
4747
"[[PERSON.DATE_OF_BIRTH:DATE(]]",
4848
"[[PERSON.DATE_OF_BIRTH:DATE)]]",
4949
"[[PERSON.DATE_OF_BIRTH:DATE]]",
50-
"[[PERSON.DATE_OF_BIRTH:INVALID_FORMAT(abc)]]",
5150
"[[PERSON.DATE_OF_BIRTH:INVALID_FORMAT(a (b) c)]]",
5251
"[[PERSON.DATE_OF_BIRTH:DATE(a (b) c)]]",
5352
],
5453
)
5554
def test_parse_invalid_token_format_raises_error(self, token):
5655
with pytest.raises(ValueError, match="Invalid token format."):
5756
TokenParser.parse(token)
57+
58+
def test_parse_function_token_valid(self):
59+
"""Test that valid function tokens are parsed correctly."""
60+
# This used to be invalid, but now we support custom functions
61+
parsed = TokenParser.parse("[[PERSON.DATE_OF_BIRTH:SOME_FUNC(abc)]]")
62+
assert parsed.function_name == "SOME_FUNC"
63+
assert parsed.function_args == "abc"
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""Tests for TokenParser with derived value function support."""
2+
3+
from dataclasses import dataclass
4+
5+
import pytest
6+
7+
from eligibility_signposting_api.services.processors.token_parser import TokenParser
8+
9+
10+
@dataclass
11+
class ExpectedTokenResult:
12+
"""Expected result for a parsed token."""
13+
14+
level: str
15+
name: str
16+
value: str | None
17+
function: str | None
18+
args: str | None
19+
date_format: str | None
20+
21+
22+
class TestTokenParserWithFunctions:
23+
"""Tests for parsing tokens with function calls like ADD_DAYS."""
24+
25+
@pytest.mark.parametrize(
26+
("token", "expected"),
27+
[
28+
# Basic ADD_DAYS function
29+
(
30+
"[[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS(91)]]",
31+
ExpectedTokenResult("TARGET", "COVID", "NEXT_DOSE_DUE", "ADD_DAYS", "91", None),
32+
),
33+
# ADD_DAYS with date format
34+
(
35+
"[[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS(91):DATE(%d %B %Y)]]",
36+
ExpectedTokenResult("TARGET", "COVID", "NEXT_DOSE_DUE", "ADD_DAYS", "91", "%d %B %Y"),
37+
),
38+
# Different vaccine type
39+
(
40+
"[[TARGET.RSV.NEXT_DOSE_DUE:ADD_DAYS(365)]]",
41+
ExpectedTokenResult("TARGET", "RSV", "NEXT_DOSE_DUE", "ADD_DAYS", "365", None),
42+
),
43+
# Case insensitive function name
44+
(
45+
"[[TARGET.COVID.NEXT_DOSE_DUE:add_days(91)]]",
46+
ExpectedTokenResult("TARGET", "COVID", "NEXT_DOSE_DUE", "ADD_DAYS", "91", None),
47+
),
48+
# Empty args (use default)
49+
(
50+
"[[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS()]]",
51+
ExpectedTokenResult("TARGET", "COVID", "NEXT_DOSE_DUE", "ADD_DAYS", "", None),
52+
),
53+
# Person level with function (hypothetical future use)
54+
(
55+
"[[PERSON.SOME_DATE:ADD_DAYS(30)]]",
56+
ExpectedTokenResult("PERSON", "SOME_DATE", None, "ADD_DAYS", "30", None),
57+
),
58+
],
59+
)
60+
def test_parse_tokens_with_functions(self, token: str, expected: ExpectedTokenResult):
61+
"""Test parsing tokens with function calls."""
62+
parsed_token = TokenParser.parse(token)
63+
64+
assert parsed_token.attribute_level == expected.level
65+
assert parsed_token.attribute_name == expected.name
66+
assert parsed_token.attribute_value == expected.value
67+
assert parsed_token.function_name == expected.function
68+
assert parsed_token.function_args == expected.args
69+
assert parsed_token.format == expected.date_format
70+
71+
def test_parse_without_function_has_none_function_fields(self):
72+
"""Test that tokens without functions have None for function fields."""
73+
parsed = TokenParser.parse("[[TARGET.COVID.LAST_SUCCESSFUL_DATE]]")
74+
75+
assert parsed.function_name is None
76+
assert parsed.function_args is None
77+
78+
def test_parse_date_format_not_treated_as_function(self):
79+
"""Test that DATE format is not treated as a derived function."""
80+
parsed = TokenParser.parse("[[PERSON.DATE_OF_BIRTH:DATE(%d %B %Y)]]")
81+
82+
assert parsed.function_name is None
83+
assert parsed.format == "%d %B %Y"
84+
85+
@pytest.mark.parametrize(
86+
"token",
87+
[
88+
"[[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS]]", # Missing parentheses
89+
"[[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS(]]", # Unclosed parenthesis
90+
"[[TARGET.COVID.NEXT_DOSE_DUE:ADD_DAYS)]]", # No opening parenthesis
91+
],
92+
)
93+
def test_parse_invalid_function_format_raises_error(self, token):
94+
"""Test that malformed function calls raise errors."""
95+
with pytest.raises(ValueError, match="Invalid token format"):
96+
TokenParser.parse(token)

0 commit comments

Comments
 (0)