Skip to content

Commit a5ac454

Browse files
authored
Merge pull request #911 from boriel-basic/fix/fix_peephole_opt_recipe_parser
refact: improve typing and improve parsing in opt parser
2 parents a43fb17 + 36e9b5b commit a5ac454

3 files changed

Lines changed: 190 additions & 49 deletions

File tree

src/arch/z80/peephole/evaluator.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ class FN(StrEnum):
7575
FN.CTEST: lambda x: memcell.MemCell(x, 1).condition_flag, # condition test, if any. E.g. retz returns 'z'
7676
FN.NEEDS: lambda x: memcell.MemCell(x[0], 1).needs(x[1]),
7777
FN.FLAGVAL: lambda x: helpers.new_tmp_val(),
78-
FN.OP1: lambda x: (x.strip().replace(",", " ", 1).split() + [""])[1],
79-
FN.OP2: lambda x: (x.strip().replace(",", " ", 1).split() + ["", ""])[2],
78+
FN.OP1: lambda x: (x.strip().replace(",", " ", 1).split() + [""])[1], # 1st Operand of an instruction or ""
79+
FN.OP2: lambda x: (x.strip().replace(",", " ", 1).split() + ["", ""])[2], # 2nd Operand of an instruction or ""
8080
}
8181

8282
# Binary operators
@@ -228,14 +228,22 @@ def eval(self, vars_: dict[str, Any] | None = None) -> str | Evaluator | list[An
228228
return vars_[val]
229229

230230
if len(self.expression) == 2:
231-
oper = FN(self.expression[0])
232-
assert oper in UNARY
231+
try:
232+
oper = FN(self.expression[0])
233+
assert oper in UNARY
234+
except (AssertionError, ValueError):
235+
raise ValueError(f"Invalid unary operator '{self.expression[0]}'")
236+
233237
operand = self.expression[1].eval(vars_)
234238
return self.normalize(UNARY[oper](operand))
235239

236240
if len(self.expression) == 3 and self.expression[1] != FN.OP_COMMA:
237-
oper = FN(self.expression[1])
238-
assert oper in BINARY
241+
try:
242+
oper = FN(self.expression[1])
243+
assert oper in BINARY
244+
except (AssertionError, ValueError):
245+
raise ValueError(f"Invalid binary operator '{self.expression[1]}'")
246+
239247
# Do lazy evaluation
240248
left_ = lambda: self.expression[0].eval(vars_)
241249
right_ = lambda: self.expression[2].eval(vars_)

src/arch/z80/peephole/parser.py

Lines changed: 72 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
O_LEVEL = "OLEVEL"
2626
O_FLAG = "OFLAG"
2727

28-
# Operators : priority (lower number -> highest priority)
28+
# Operators : precedence (lower number -> highest priority)
2929
IF_OPERATORS: Final[MappingProxyType[FN, int]] = MappingProxyType(
3030
{
3131
FN.OP_NMUL: 3,
@@ -54,6 +54,10 @@
5454
REQUIRED = (REG_REPLACE, REG_WITH, O_LEVEL, O_FLAG)
5555

5656

57+
class PeepholeParserSyntaxError(SyntaxError):
58+
pass
59+
60+
5761
def simplify_expr(expr: list[Any]) -> list[Any]:
5862
"""Simplifies ("unnest") a list, removing redundant brackets.
5963
i.e. [[x, [[y]]] becomes [x, [y]]
@@ -79,23 +83,35 @@ class DefineLine(NamedTuple):
7983
expr: Evaluator
8084

8185

82-
def parse_ifline(if_line: str, lineno: int) -> TreeType | None:
83-
"""Given a line from within a IF region (i.e. $1 == "af'")
84-
returns it as a list of tokens ['$1', '==', "af'"]
85-
"""
86-
stack: list[TreeType] = []
87-
expr: TreeType = []
88-
paren = 0
89-
error_ = False
86+
class Tokenizer:
87+
def __init__(self, source: str, lineno: int) -> None:
88+
self.source = source
89+
self.lineno = lineno
9090

91-
while not error_ and if_line:
92-
if_line = if_line.strip()
93-
if not if_line:
94-
break
95-
qq = RE_IFPARSE.match(if_line)
91+
def get_token(self) -> str:
92+
"""Returns next token, or "" as EOL"""
93+
tok = self.lookahead()
94+
self.source = self.source[len(tok) :]
95+
return tok
96+
97+
def get_next_token(self) -> str:
98+
if self.has_finished():
99+
raise PeepholeParserSyntaxError("Unexpected EOL")
100+
101+
return self.get_token()
102+
103+
def has_finished(self) -> bool:
104+
return self.source == ""
105+
106+
def lookahead(self) -> str:
107+
"""Returns next token, or "" as EOL"""
108+
self.source = self.source.strip()
109+
if self.has_finished():
110+
return ""
111+
112+
qq = RE_IFPARSE.match(self.source)
96113
if not qq:
97-
error_ = True
98-
break
114+
raise PeepholeParserSyntaxError(f"Syntax error in line {self.lineno}: {self.source}")
99115

100116
tok = qq.group()
101117
if not RE_ID.match(tok):
@@ -104,7 +120,25 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None:
104120
tok = tok[: len(oper)]
105121
break
106122

107-
if_line = if_line[len(tok) :]
123+
return tok
124+
125+
126+
def parse_ifline(if_line: str, lineno: int) -> TreeType | None:
127+
"""Given a line from within a IF region (i.e. $1 == "af'")
128+
returns it as a list of tokens ['$1', '==', "af'"]
129+
"""
130+
stack: list[TreeType] = []
131+
expr: TreeType = []
132+
paren = 0
133+
error_ = False
134+
tokenizer = Tokenizer(if_line, lineno)
135+
136+
while not tokenizer.has_finished():
137+
try:
138+
tok = tokenizer.get_token()
139+
except PeepholeParserSyntaxError as e:
140+
errmsg.warning(lineno, str(e))
141+
return None
108142

109143
if tok == "(":
110144
paren += 1
@@ -139,8 +173,8 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None:
139173
errmsg.warning(lineno, f"Unexpected {tok} in list")
140174
return None
141175

142-
while len(expr) == 2 and isinstance(expr[-2], str):
143-
op: str | TreeType = expr[-2]
176+
while len(expr) == 2 and isinstance(expr[0], str):
177+
op: str = expr[0]
144178
if op in UNARY:
145179
stack[-1].append(expr)
146180
expr = stack.pop()
@@ -162,7 +196,11 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None:
162196

163197
expr = [expr]
164198

165-
if not error_ and paren:
199+
if error_:
200+
errmsg.warning(lineno, "syntax error in IF section")
201+
return None
202+
203+
if paren:
166204
errmsg.warning(lineno, "unclosed parenthesis in IF section")
167205
return None
168206

@@ -181,11 +219,20 @@ def parse_ifline(if_line: str, lineno: int) -> TreeType | None:
181219
errmsg.warning(lineno, f"unexpected binary operator '{op}'")
182220
return None
183221

184-
if error_:
185-
errmsg.warning(lineno, "syntax error in IF section")
222+
expr = simplify_expr(expr)
223+
if len(expr) == 2 and isinstance(expr[-1], str) and expr[-1] in BINARY:
224+
errmsg.warning(lineno, f"Unexpected binary operator '{expr[-1]}'")
225+
return None
226+
227+
if len(expr) == 3 and (expr[1] not in BINARY or expr[1] == FN.OP_COMMA):
228+
errmsg.warning(lineno, f"Unexpected binary operator '{expr[1]}'")
229+
return None
230+
231+
if len(expr) > 3:
232+
errmsg.warning(lineno, "Lists not allowed in IF section condition. Missing operator")
186233
return None
187234

188-
return simplify_expr(expr)
235+
return expr
189236

190237

191238
def parse_define_line(sourceline: SourceLine) -> tuple[str | None, TreeType | None]:
@@ -329,12 +376,12 @@ def check_entry(key: str) -> bool:
329376
return result
330377

331378

332-
def parse_file(fname: str):
379+
def parse_file(fname: str) -> dict[str, str | int | list[str | list]] | None:
333380
"""Opens and parse a file given by filename"""
334381
tmp = global_.FILENAME
335382
global_.FILENAME = fname # set filename so it shows up in error/warning msgs
336383

337-
with open(fname, "rt") as f:
384+
with open(fname, "rt", encoding="utf-8") as f:
338385
result = parse_str(f.read())
339386

340387
global_.FILENAME = tmp # restores original filename

tests/arch/zx48k/peephole/test_parser.py

Lines changed: 104 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,22 @@ def test_parse_string(self):
3232

3333
self.maxDiff = None
3434
self.assertIsInstance(result, dict)
35-
self.assertDictEqual(
36-
result,
37-
{
38-
"DEFINE": [],
39-
"IF": [
40-
[
41-
[["$1", "==", "af'"], "&&", ["$1", "==", 'Hello ""World""']],
42-
"&&",
43-
[["$1", "==", "(hl)"], "||", ["IS_INDIR", ["$1"]]],
44-
],
45-
"||",
46-
["$1", "==", "aa"],
35+
assert result == {
36+
"DEFINE": [],
37+
"IF": [
38+
[
39+
[["$1", "==", "af'"], "&&", ["$1", "==", 'Hello ""World""']],
40+
"&&",
41+
[["$1", "==", "(hl)"], "||", ["IS_INDIR", ["$1"]]],
4742
],
48-
"OFLAG": 15,
49-
"OLEVEL": 1,
50-
"REPLACE": ["push $1", "pop $1"],
51-
"WITH": [],
52-
},
53-
)
43+
"||",
44+
["$1", "==", "aa"],
45+
],
46+
"OFLAG": 15,
47+
"OLEVEL": 1,
48+
"REPLACE": ["push $1", "pop $1"],
49+
"WITH": [],
50+
}
5451

5552
def test_parse_call(self):
5653
result = parser.parse_str(
@@ -398,3 +395,92 @@ def test_parse_if_must_start_in_a_new_line(self):
398395
"""
399396
)
400397
assert result is None
398+
399+
def test_parse_with_ending_binary_error(self):
400+
result = parser.parse_str(
401+
"""
402+
;; Remove the boolean normalization if it's done after calling
403+
;; certain routines that return the bool result already normalized.
404+
405+
;; The sequence
406+
;; sub 1
407+
;; sbc a, a
408+
;; inc a
409+
;; can be removed
410+
411+
OLEVEL: 1
412+
OFLAG: 20
413+
414+
REPLACE {{
415+
$1
416+
}}
417+
418+
WITH {{
419+
}}
420+
421+
IF {{
422+
$1 == "ld a, 0" ||
423+
}}
424+
"""
425+
)
426+
assert result is None
427+
428+
def test_parse_with_comma_error(self):
429+
result = parser.parse_str(
430+
"""
431+
;; Remove the boolean normalization if it's done after calling
432+
;; certain routines that return the bool result already normalized.
433+
434+
;; The sequence
435+
;; sub 1
436+
;; sbc a, a
437+
;; inc a
438+
;; can be removed
439+
440+
OLEVEL: 1
441+
OFLAG: 20
442+
443+
REPLACE {{
444+
$1
445+
}}
446+
447+
WITH {{
448+
}}
449+
450+
IF {{
451+
$1 == "ld a, 0" ,
452+
$1 == "ld a, 0"
453+
}}
454+
"""
455+
)
456+
assert result is None
457+
458+
def test_parse_with_nested_comma_error(self):
459+
result = parser.parse_str(
460+
"""
461+
;; Remove the boolean normalization if it's done after calling
462+
;; certain routines that return the bool result already normalized.
463+
464+
;; The sequence
465+
;; sub 1
466+
;; sbc a, a
467+
;; inc a
468+
;; can be removed
469+
470+
OLEVEL: 1
471+
OFLAG: 20
472+
473+
REPLACE {{
474+
$1
475+
}}
476+
477+
WITH {{
478+
}}
479+
480+
IF {{
481+
($1 == "ld a, 0" ,
482+
$1 == "ld a, 0")
483+
}}
484+
"""
485+
)
486+
assert result is None

0 commit comments

Comments
 (0)