Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion misc/dump-ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def dump(fname: str, python_version: tuple[int, int], quiet: bool = False) -> No
options.python_version = python_version
with open(fname, "rb") as f:
s = f.read()
tree = parse(s, fname, None, errors=Errors(options), options=options)
tree = parse(s, fname, None, errors=Errors(options), options=options, file_exists=True)
if not quiet:
print(tree)

Expand Down
287 changes: 210 additions & 77 deletions mypy/build.py

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions mypy/checkstrformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
MemberExpr,
MypyFile,
NameExpr,
Node,
StarExpr,
StrExpr,
TempNode,
Expand Down Expand Up @@ -582,8 +581,13 @@ def apply_field_accessors(

temp_errors = Errors(self.chk.options)
dummy = DUMMY_FIELD_NAME + spec.field[len(spec.key) :]
temp_ast: Node = parse(
dummy, fnam="<format>", module=None, options=self.chk.options, errors=temp_errors
temp_ast, _ = parse(
dummy,
fnam="<format>",
module=None,
options=self.chk.options,
errors=temp_errors,
file_exists=False,
)
if temp_errors.is_errors():
self.msg.fail(
Expand Down
29 changes: 16 additions & 13 deletions mypy/nativeparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from __future__ import annotations

import os
from typing import Any, Final, cast
import time
from typing import Final, cast

import ast_serialize # type: ignore[import-untyped, import-not-found, unused-ignore]
from librt.internal import (
Expand Down Expand Up @@ -101,6 +102,7 @@
OpExpr,
OverloadedFuncDef,
OverloadPart,
ParseError,
PassStmt,
RaiseStmt,
RefExpr,
Expand Down Expand Up @@ -168,17 +170,11 @@
class State:
def __init__(self, options: Options) -> None:
self.options = options
self.errors: list[dict[str, Any]] = []
self.errors: list[ParseError] = []
self.num_funcs = 0

def add_error(
self,
message: str,
line: int,
column: int,
*,
blocker: bool = False,
code: str | None = None,
self, message: str, line: int, column: int, *, blocker: bool = False, code: str
) -> None:
"""Report an error at a specific location.

Expand All @@ -196,7 +192,7 @@ def add_error(

def native_parse(
filename: str, options: Options, skip_function_bodies: bool = False, imports_only: bool = False
) -> tuple[MypyFile, list[dict[str, Any]], TypeIgnores]:
) -> tuple[MypyFile, list[ParseError], TypeIgnores]:
"""Parse a Python file using the native Rust-based parser.

Uses the ast_serialize Rust extension to parse Python code and deserialize
Expand All @@ -214,7 +210,7 @@ def native_parse(
Returns:
A tuple containing:
- MypyFile: The parsed AST as a mypy AST node
- list[dict[str, Any]]: List of parse errors and deserialization errors
- list[ParseError]: List of parse errors and deserialization errors
- TypeIgnores: List of (line_number, ignored_codes) tuples for type: ignore comments
"""
# If the path is a directory, return empty AST (matching fastparse behavior)
Expand Down Expand Up @@ -272,7 +268,14 @@ def read_statements(state: State, data: ReadBuffer, n: int) -> list[Statement]:

def parse_to_binary_ast(
filename: str, options: Options, skip_function_bodies: bool = False
) -> tuple[bytes, list[dict[str, Any]], TypeIgnores, bytes, bool, bool]:
) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool]:
# This is a horrible hack to work around a mypyc bug where imported
# module may be not ready in a thread sometimes.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: maybe add a timeout so that we give up after say 60s and raise an exception?

t0 = time.time()
while ast_serialize is None:
time.sleep(0.0001) # type: ignore[unreachable]
if time.time() - t0 > 10.0:
raise ImportError("Cannot import ast_serialize")
ast_bytes, errors, ignores, import_bytes, ast_data = ast_serialize.parse(
filename,
skip_function_bodies=skip_function_bodies,
Expand All @@ -284,7 +287,7 @@ def parse_to_binary_ast(
)
return (
ast_bytes,
cast("list[dict[str, Any]]", errors),
errors,
ignores,
import_bytes,
ast_data["is_partial_package"],
Expand Down
45 changes: 41 additions & 4 deletions mypy/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
Final,
Optional,
TypeAlias as _TypeAlias,
TypedDict,
TypeGuard,
TypeVar,
Union,
cast,
)
from typing_extensions import NotRequired

from librt.internal import (
extract_symbol,
Expand All @@ -39,7 +41,9 @@
LIST_GEN,
LIST_STR,
LITERAL_COMPLEX,
LITERAL_FALSE,
LITERAL_NONE,
LITERAL_TRUE,
ReadBuffer,
Tag,
WriteBuffer,
Expand Down Expand Up @@ -313,6 +317,39 @@ def read(cls, data: ReadBuffer) -> SymbolNode:
Definition: _TypeAlias = tuple[str, "SymbolTableNode", Optional["TypeInfo"]]


class ParseError(TypedDict):
line: int
column: int
message: str
blocker: NotRequired[bool]
code: NotRequired[str]


def write_parse_error(data: WriteBuffer, err: ParseError) -> None:
write_int(data, err["line"])
write_int(data, err["column"])
write_str(data, err["message"])
if (blocker := err.get("blocker")) is not None:
write_bool(data, blocker)
else:
write_tag(data, LITERAL_NONE)
write_str_opt(data, err.get("code"))


def read_parse_error(data: ReadBuffer) -> ParseError:
err: ParseError = {"line": read_int(data), "column": read_int(data), "message": read_str(data)}
tag = read_tag(data)
if tag == LITERAL_TRUE:
err["blocker"] = True
elif tag == LITERAL_FALSE:
err["blocker"] = False
else:
assert tag == LITERAL_NONE
if (code := read_str_opt(data)) is not None:
err["code"] = code
return err


class FileRawData:
"""Raw (binary) data representing parsed, but not deserialized file."""

Expand All @@ -327,7 +364,7 @@ class FileRawData:

defs: bytes
imports: bytes
raw_errors: list[dict[str, Any]] # TODO: switch to more precise type here.
raw_errors: list[ParseError]
ignored_lines: dict[int, list[str]]
is_partial_stub_package: bool
uses_template_strings: bool
Expand All @@ -336,7 +373,7 @@ def __init__(
self,
defs: bytes,
imports: bytes,
raw_errors: list[dict[str, Any]],
raw_errors: list[ParseError],
ignored_lines: dict[int, list[str]],
is_partial_stub_package: bool,
uses_template_strings: bool,
Expand All @@ -354,7 +391,7 @@ def write(self, data: WriteBuffer) -> None:
write_tag(data, LIST_GEN)
write_int_bare(data, len(self.raw_errors))
for err in self.raw_errors:
write_json(data, err)
write_parse_error(data, err)
write_tag(data, DICT_INT_GEN)
write_int_bare(data, len(self.ignored_lines))
for line, codes in self.ignored_lines.items():
Expand All @@ -368,7 +405,7 @@ def read(cls, data: ReadBuffer) -> FileRawData:
defs = read_bytes(data)
imports = read_bytes(data)
assert read_tag(data) == LIST_GEN
raw_errors = [read_json(data) for _ in range(read_int_bare(data))]
raw_errors = [read_parse_error(data) for _ in range(read_int_bare(data))]
assert read_tag(data) == DICT_INT_GEN
ignored_lines = {read_int(data): read_str_list(data) for _ in range(read_int_bare(data))}
return FileRawData(
Expand Down
61 changes: 22 additions & 39 deletions mypy/parse.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import annotations

import os
import re

from librt.internal import ReadBuffer

from mypy import errorcodes as codes
from mypy.cache import read_int
from mypy.errors import Errors
from mypy.nodes import FileRawData, MypyFile
from mypy.nodes import FileRawData, MypyFile, ParseError
from mypy.options import Options


Expand All @@ -18,9 +17,9 @@ def parse(
module: str | None,
errors: Errors,
options: Options,
raise_on_error: bool = False,
file_exists: bool,
imports_only: bool = False,
) -> MypyFile:
) -> tuple[MypyFile, list[ParseError]]:
"""Parse a source file, without doing any semantic analysis.

Return the parse tree. If errors is not provided, raise ParseError
Expand All @@ -31,14 +30,12 @@ def parse(
if options.native_parser:
# Native parser only works with actual files on disk
# Fall back to fastparse for in-memory source or non-existent files
if os.path.exists(fnam):
if file_exists:
import mypy.nativeparse

ignore_errors = options.ignore_errors or fnam in errors.ignored_files
# If errors are ignored, we can drop many function bodies to speed up type checking.
strip_function_bodies = ignore_errors and not options.preserve_asts

errors.set_file(fnam, module, options=options)
tree, parse_errors, type_ignores = mypy.nativeparse.native_parse(
fnam,
options,
Expand All @@ -51,26 +48,7 @@ def parse(
tree.is_stub = fnam.endswith(".pyi")
# Note: tree.imports is populated directly by native_parse with deserialized
# import metadata, so we don't need to collect imports via AST traversal

# Report parse errors
for error in parse_errors:
message = error["message"]
# Standardize error message by capitalizing the first word
message = re.sub(r"^(\s*\w)", lambda m: m.group(1).upper(), message)
# Respect blocker status from error, default to True for syntax errors
is_blocker = error.get("blocker", True)
error_code = error.get("code")
if error_code is None:
error_code = codes.SYNTAX
else:
# Fallback to [syntax] for backwards compatibility.
error_code = codes.error_codes.get(error_code) or codes.SYNTAX
errors.report(
error["line"], error["column"], message, blocker=is_blocker, code=error_code
)
if raise_on_error and errors.is_errors():
errors.raise_error()
return tree
return tree, parse_errors
# Fall through to fastparse for non-existent files

assert not imports_only
Expand All @@ -79,9 +57,7 @@ def parse(
import mypy.fastparse

tree = mypy.fastparse.parse(source, fnam=fnam, module=module, errors=errors, options=options)
if raise_on_error and errors.is_errors():
errors.raise_error()
return tree
return tree, []


def load_from_raw(
Expand Down Expand Up @@ -112,14 +88,21 @@ def load_from_raw(
all_errors = raw_data.raw_errors + state.errors
errors.set_file(fnam, module, options=options)
for error in all_errors:
message = error["message"]
message = re.sub(r"^(\s*\w)", lambda m: m.group(1).upper(), message)
is_blocker = error.get("blocker", True)
error_code = error.get("code")
if error_code is None:
error_code = codes.SYNTAX
else:
error_code = codes.error_codes.get(error_code) or codes.SYNTAX
# Note we never raise in this function, so it should not be called in coordinator.
errors.report(error["line"], error["column"], message, blocker=is_blocker, code=error_code)
report_parse_error(error, errors)
return tree


def report_parse_error(error: ParseError, errors: Errors) -> None:
message = error["message"]
# Standardize error message by capitalizing the first word
message = re.sub(r"^(\s*\w)", lambda m: m.group(1).upper(), message)
# Respect blocker status from error, default to True for syntax errors
is_blocker = error.get("blocker", True)
error_code = error.get("code")
if error_code is None:
error_code = codes.SYNTAX
else:
# Fallback to [syntax] for backwards compatibility.
error_code = codes.error_codes.get(error_code) or codes.SYNTAX
errors.report(error["line"], error["column"], message, blocker=is_blocker, code=error_code)
28 changes: 16 additions & 12 deletions mypy/semanal_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,17 +463,18 @@ def apply_class_plugin_hooks(graph: Graph, scc: list[str], errors: Errors) -> No
state = graph[module]
tree = state.tree
assert tree
for _, node, _ in tree.local_definitions():
if isinstance(node.node, TypeInfo):
if not apply_hooks_to_class(
state.manager.semantic_analyzer,
module,
node.node,
state.options,
tree,
errors,
):
incomplete = True
with state.wrap_context():
for _, node, _ in tree.local_definitions():
if isinstance(node.node, TypeInfo):
if not apply_hooks_to_class(
state.manager.semantic_analyzer,
module,
node.node,
state.options,
tree,
errors,
):
incomplete = True


def apply_hooks_to_class(
Expand Down Expand Up @@ -524,7 +525,10 @@ def calculate_class_properties(graph: Graph, scc: list[str], errors: Errors) ->
assert tree
for _, node, _ in tree.local_definitions():
if isinstance(node.node, TypeInfo):
with state.manager.semantic_analyzer.file_context(tree, state.options, node.node):
with (
state.wrap_context(),
state.manager.semantic_analyzer.file_context(tree, state.options, node.node),
):
calculate_class_abstract_status(node.node, tree.is_stub, errors)
check_protocol_status(node.node, errors)
calculate_class_vars(node.node)
Expand Down
11 changes: 9 additions & 2 deletions mypy/stubgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1744,10 +1744,17 @@ def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None:
data = f.read()
source = mypy.util.decode_python_encoding(data)
errors = Errors(mypy_options)
mod.ast = mypy.parse.parse(
source, fnam=mod.path, module=mod.module, errors=errors, options=mypy_options
mod.ast, errs = mypy.parse.parse(
source,
fnam=mod.path,
module=mod.module,
errors=errors,
options=mypy_options,
file_exists=True,
)
mod.ast._fullname = mod.module
for err in errs:
mypy.parse.report_parse_error(err, errors)
if errors.is_blockers():
# Syntax error!
for m in errors.new_messages():
Expand Down
Loading
Loading