From 4c1b2b2293a42dc4929bd93c45a7f6926a20caef Mon Sep 17 00:00:00 2001 From: swananan Date: Mon, 4 May 2026 00:09:53 +0800 Subject: [PATCH] refactor: consolidate dwarf read planning Move DWARF variable access to PC-context read plans, making ghostscope-dwarf own scope selection, access paths, availability, and lowering requirements before compiler codegen. Route compiler variable reads through planned locations and remove raw evaluation adapters from the compiler-facing path. Reject unsafe fallbacks for shadowed variables, ambiguous globals, and value-backed aggregate member offsets. Preserve optimized-out print metadata, keep link-time addresses rebased, and cover optimized variables, member access, and address trace failures. Refs #148. --- bins/dwarf-tool/src/main.rs | 25 +- docs/architecture.md | 17 +- docs/comparison.md | 14 +- docs/zh/architecture.md | 16 +- docs/zh/comparison.md | 14 +- e2e-tests/tests/dwarf_index_regressions.rs | 26 + .../tests/entry_value_recovery_execution.rs | 76 +- e2e-tests/tests/member_pointer_compilation.rs | 354 ++- .../optimized_inline_call_value_execution.rs | 21 +- e2e-tests/tests/optimized_inline_execution.rs | 12 +- e2e-tests/tests/script_execution.rs | 81 +- ghostscope-compiler/README.md | 9 +- ghostscope-compiler/src/ebpf/codegen.rs | 121 +- ghostscope-compiler/src/ebpf/context.rs | 7 +- ghostscope-compiler/src/ebpf/dwarf_bridge.rs | 2438 ++++++----------- ghostscope-compiler/src/ebpf/expression.rs | 156 +- ghostscope-compiler/src/lib.rs | 63 +- ghostscope-compiler/src/script/compiler.rs | 81 +- ghostscope-dwarf/README.md | 14 +- .../src/{analyzer.rs => analyzer/mod.rs} | 554 ++-- ghostscope-dwarf/src/analyzer/plan_global.rs | 212 ++ ghostscope-dwarf/src/analyzer/plan_pc.rs | 375 +++ ghostscope-dwarf/src/analyzer/type_lookup.rs | 191 ++ ghostscope-dwarf/src/core/diagnostic.rs | 187 ++ ghostscope-dwarf/src/core/evaluation.rs | 20 +- ghostscope-dwarf/src/core/ids.rs | 50 + ghostscope-dwarf/src/core/mod.rs | 7 +- ghostscope-dwarf/src/core/plan.rs | 242 ++ ghostscope-dwarf/src/core/types.rs | 18 - ghostscope-dwarf/src/dwarf_expr/lower.rs | 305 ++- ghostscope-dwarf/src/dwarf_expr/ops.rs | 114 + ghostscope-dwarf/src/index/block_index.rs | 14 +- ghostscope-dwarf/src/index/cfi_index.rs | 263 +- ghostscope-dwarf/src/lib.rs | 39 +- .../src/objfile/access_planner.rs | 767 ------ ghostscope-dwarf/src/objfile/globals.rs | 74 +- ghostscope-dwarf/src/objfile/loaded.rs | 20 + ghostscope-dwarf/src/objfile/mod.rs | 1 - ghostscope-dwarf/src/objfile/variables.rs | 1142 ++++---- .../src/parser/detailed_parser.rs | 85 +- ghostscope-dwarf/src/parser/fast_parser.rs | 2 - ghostscope-dwarf/src/parser/mod.rs | 4 +- ghostscope-dwarf/src/semantics/mod.rs | 12 +- ghostscope-dwarf/src/semantics/pc_context.rs | 63 + ghostscope-dwarf/src/semantics/types.rs | 24 - ghostscope-dwarf/src/semantics/unwind_plan.rs | 120 + .../src/semantics/variable_plan.rs | 1234 +++++++++ ghostscope/src/cli/script_output.rs | 62 + ghostscope/src/cli/script_runtime.rs | 5 + ghostscope/src/config/runtime.rs | 19 +- ghostscope/src/core/session.rs | 5 + ghostscope/src/script/compiler.rs | 7 +- ghostscope/src/tui/info_handlers.rs | 4 +- 53 files changed, 6284 insertions(+), 3502 deletions(-) rename ghostscope-dwarf/src/{analyzer.rs => analyzer/mod.rs} (74%) create mode 100644 ghostscope-dwarf/src/analyzer/plan_global.rs create mode 100644 ghostscope-dwarf/src/analyzer/plan_pc.rs create mode 100644 ghostscope-dwarf/src/analyzer/type_lookup.rs create mode 100644 ghostscope-dwarf/src/core/diagnostic.rs create mode 100644 ghostscope-dwarf/src/core/ids.rs create mode 100644 ghostscope-dwarf/src/core/plan.rs delete mode 100644 ghostscope-dwarf/src/objfile/access_planner.rs create mode 100644 ghostscope-dwarf/src/semantics/pc_context.rs create mode 100644 ghostscope-dwarf/src/semantics/unwind_plan.rs create mode 100644 ghostscope-dwarf/src/semantics/variable_plan.rs diff --git a/bins/dwarf-tool/src/main.rs b/bins/dwarf-tool/src/main.rs index 411c4758..15cdf19d 100644 --- a/bins/dwarf-tool/src/main.rs +++ b/bins/dwarf-tool/src/main.rs @@ -4,9 +4,9 @@ use anyhow::Result; use clap::{Parser, Subcommand}; -use ghostscope_dwarf::core::SectionType; use ghostscope_dwarf::{ AddressQueryResult, DwarfAnalyzer, FunctionQueryResult, ModuleLoadingEvent, ModuleLoadingStats, + SectionType, }; use std::path::PathBuf; use std::sync::{Arc, Mutex}; @@ -629,7 +629,7 @@ async fn analyze_source_location( fn iter_address_query_variables<'a>( address: &'a AddressQueryResult, -) -> impl Iterator + 'a { +) -> impl Iterator + 'a { address.parameters.iter().chain(address.variables.iter()) } @@ -641,11 +641,11 @@ fn total_variables_in_query_results(addresses: &[AddressQueryResult]) -> usize { addresses.iter().map(query_address_variable_count).sum() } -fn variable_info_from_query(variable: &ghostscope_dwarf::VariableWithEvaluation) -> VariableInfo { +fn variable_info_from_query(variable: &ghostscope_dwarf::VisibleVariable) -> VariableInfo { VariableInfo { name: variable.name.clone(), type_name: variable.type_name.clone(), - location: format!("{}", variable.evaluation_result), + location: format!("{}", variable.location), is_parameter: variable.is_parameter, scope_depth: variable.scope_depth as u32, } @@ -750,7 +750,7 @@ async fn analyze_function( format!("{} (no DWARF info)", var.type_name) }; - println!("{}: {} = {}", var.name, type_str, var.evaluation_result); + println!("{}: {} = {}", var.name, type_str, var.location); } } else { println!(" Address: 0x{:x}", address.address); @@ -796,10 +796,7 @@ async fn analyze_module_address( ); } else if options.quiet() { for var in iter_address_query_variables(&address_info) { - println!( - "{}: {} = {}", - var.name, var.type_name, var.evaluation_result - ); + println!("{}: {} = {}", var.name, var.type_name, var.location); } } else { println!("\n=== {module_path} @ 0x{address:x} ==="); @@ -1175,7 +1172,7 @@ fn percentile_nearest_rank(sorted_samples_ms: &[f64], percentile: f64) -> f64 { } fn print_variables_with_style<'a>( - variables: impl IntoIterator, + variables: impl IntoIterator, options: &Commands, ) { for (i, var) in variables.into_iter().enumerate() { @@ -1194,7 +1191,7 @@ fn print_variables_with_style<'a>( println!(" Scope Depth: {}", var.scope_depth); println!(" Is Parameter: {}", var.is_parameter); println!(" Is Artificial: {}", var.is_artificial); - println!(" Location: {}", var.evaluation_result); + println!(" Location: {}", var.location); println!(); } else { let param_marker = if var.is_parameter { " (param)" } else { "" }; @@ -1213,14 +1210,14 @@ fn print_variables_with_style<'a>( println!( " ├─ {}: {} = {}{}{}", - var.name, type_str, var.evaluation_result, param_marker, artificial_marker + var.name, type_str, var.location, param_marker, artificial_marker ); } } } fn print_variables_with_indent<'a>( - variables: impl IntoIterator, + variables: impl IntoIterator, indent: &str, ) { for var in variables { @@ -1240,7 +1237,7 @@ fn print_variables_with_indent<'a>( println!( "{}├─ {}: {} = {}{}{}", - indent, var.name, type_str, var.evaluation_result, param_marker, artificial_marker + indent, var.name, type_str, var.location, param_marker, artificial_marker ); } } diff --git a/docs/architecture.md b/docs/architecture.md index d260730c..f4bd040d 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -57,7 +57,7 @@ GhostScope uses Cargo workspace for modular design: |-------|---------| | **ghostscope** | Main binary and runtime coordinator - orchestrates all components via async event loop | | **ghostscope-compiler** | Script compilation pipeline - transforms user scripts into verified eBPF bytecode via LLVM | -| **ghostscope-dwarf** | Debug information analyzer - provides cross-module symbol resolution and type information | +| **ghostscope-dwarf** | PC-context DWARF semantic engine - resolves source locations, visible variables, type layouts, address mappings, and compiler read plans | | **ghostscope-loader** | eBPF program lifecycle manager - handles uprobe attachment and ring buffer management via Aya | | **ghostscope-ui** | Terminal user interface - implements interactive TUI with TEA (The Elm Architecture) pattern | | **ghostscope-protocol** | Communication protocol - defines message format for eBPF-userspace data exchange | @@ -88,9 +88,10 @@ GhostScope uses Cargo workspace for modular design: **Key feature**: Progressive loading with callbacks for UI progress updates. -### 3. DWARF Analyzer +### 3. DWARF Semantic Engine -**Role**: High-performance multi-module debug information system. +**Role**: High-performance multi-module debug information system and +PC-context semantic planner. **Core Optimizations**: @@ -115,6 +116,12 @@ GhostScope uses Cargo workspace for modular design: - Virtual address to file offset conversion - Runtime address mapping for process-specific traces +5. **PC-Context Read Planning** + - Resolves locals, parameters, globals, and inline scopes at a specific probe PC + - Produces typed read plans for the compiler instead of exposing raw DWARF locations + - Preserves semantic distinctions such as optimized-out values, rebased absolute addresses, and value-backed aggregates + - Reports compile-time diagnostics when a variable is visible but cannot be safely lowered + TODO: Still slow, need to research how GDB optimizes DWARF parsing performance. ### 4. Compilation Pipeline @@ -135,9 +142,9 @@ Multi-stage pipeline with type safety at each level: ┌──────────────────────────────────────────────────────────┐ │ Stage 2: LLVM IR Generation │ │ │ -│ AST + DWARF Info │ +│ AST + PC Context + DWARF Read Plans │ │ ↓ │ -│ Symbol Resolution (variables, types, locations) │ +│ Plan Lowering (variables, types, availability) │ │ ↓ │ │ LLVM IR (type-safe intermediate representation) │ └──────────────────────────────────────────────────────────┘ diff --git a/docs/comparison.md b/docs/comparison.md index 1b86181f..ccea9b92 100644 --- a/docs/comparison.md +++ b/docs/comparison.md @@ -68,17 +68,17 @@ We now ship a reproducible single-thread benchmark for one narrow question: "wha | Aspect | GhostScope | perf probe / perf uprobes | |---|---|---| -| Positioning | Purpose-built userspace tracer with runtime DWARF evaluation, a small DSL, and a TUI/session workflow | Declarative probe-definition frontend plus the broader perf recording and reporting pipeline | +| Positioning | Purpose-built userspace tracer with PC-context DWARF planning, a small DSL, and a TUI/session workflow | Declarative probe-definition frontend plus the broader perf recording and reporting pipeline | | Programmability and safety model | eBPF-backed collection logic with programmable filtering and formatting; flexibility is constrained by the verifier | Narrower, more declarative capability surface: define probe points and fetchargs, but not an eBPF-style "run custom logic on each hit" programming model | | Source-level frontend | Function, source-line, and instruction-oriented tracing are core workflows | Strong native support for functions, source lines, locals, and inline-related probe discovery inside `perf probe` | -| Variable access style | Runtime DWARF evaluation for locals, parameters, and globals; typed rendering in the tracer UI | Declarative fetchargs for locals, parameters, registers, symbols, arrays, and return values | +| Variable access style | Compile/load-time DWARF read planning for locals, parameters, and globals, followed by eBPF runtime reads and typed rendering | Declarative fetchargs for locals, parameters, registers, symbols, arrays, and return values | | Inline and discovery workflow | Good source-driven attachment, but within GhostScope's tracer model | Mature discovery workflow for lines, functions, and inline-related probe search such as `--line`, `--vars`, and `--no-inlines` | | What happens after a hit | Structured data can be filtered, sampled, and shaped before delivery to userspace | Mostly fixed event-field extraction, then hand off to the perf recording and reporting toolchain | | Output and consumption | RingBuf or PerfEventArray to a custom realtime reader/TUI | Common path is `perf probe` -> `perf record` -> `perf.data` -> `perf report` or `perf script` | | Best at | Production-oriented live userspace diagnosis with structured output and a dedicated runtime workflow | Quick one-off probes and reuse of the existing perf ecosystem | | Tradeoff | More opinionated; not meant to be the general perf toolkit | Less programmable than eBPF-based tracers and less centered on custom realtime processing | -Choose GhostScope when you want a purpose-built online tracer with runtime DWARF semantics, programmable filtering, and a friendlier live diagnosis workflow. Choose perf when you want to quickly place a function, source-line, or local-variable probe and stay inside the perf ecosystem. +Choose GhostScope when you want a purpose-built online tracer with PC-context DWARF semantics, programmable filtering, and a friendlier live diagnosis workflow. Choose perf when you want to quickly place a function, source-line, or local-variable probe and stay inside the perf ecosystem. Background: a practical shorthand is `perf probe` = more fixed-semantics and ready-to-use, not "zero configurability"; GhostScope's eBPF-backed tracer model trades that simplicity for more programmable hit handling and a richer live workflow. @@ -87,10 +87,10 @@ Background: a practical shorthand is `perf probe` = more fixed-semantics and rea | Aspect | GhostScope | bpftrace | |---|---|---| | Positioning | DWARF-aware userspace observation; restores source-level semantics | General-purpose eBPF dynamic tracer; event observation and aggregation | -| DWARF usage | Evaluates DWARF expressions at runtime; reads params, locals, and globals | Parses args and structs; not centered on runtime evaluation of location expressions | +| DWARF usage | Plans variable reads from DWARF at compile/load time, then emits eBPF reads for params, locals, and globals | Parses args and structs; not centered on PC-context variable read planning | | Attachment granularity and symbols | Line-table-driven source-line and instruction attachment, plus function-oriented tracing | Entry/return, in-function offsets, absolute locations, and event probes; no built-in line-to-address workflow | | Observable data | Supports locals, parameters, and globals; renders values with real types | Strong for arguments, structs, and event streams; less focused on recovering arbitrary live userspace state | -| ASLR impact | Runtime DWARF computation naturally adapts to ASLR and PIE | `uaddr()`-style global reads become awkward or unavailable under ASLR and PIE | +| ASLR impact | DWARF read plans preserve rebasing requirements for PIE, shared libraries, and absolute-address values | `uaddr()`-style global reads become awkward or unavailable under ASLR and PIE | | Interaction experience | TUI-friendly, observe without interruption | Script-style output and aggregation; less interactive | | Best at | Recovering real userspace state from live code paths | Correlating many event sources quickly | | Tradeoff | Narrower scope | Less focused on source-level userspace diagnosis | @@ -105,8 +105,8 @@ Background: one motivation for GhostScope was that newer bpftrace versions no lo |---|---|---| | Position and scope | DWARF-aware userspace observation aimed at production printf-style debugging with an interactive workflow | Broad tracing framework with kernel and userspace coverage, including an eBPF backend | | Source line and statement probes | Supported; line-level attachment is a core path | Supported; statement probes can be resolved and attached | -| Variable access (params, locals, globals) | Supported. Evaluate DWARF at runtime with gimli; render by real types; naturally ASLR and PIE friendly | Supported. DWARF location expressions are lowered through SystemTap's pipeline into eBPF-compatible logic, with verifier and stack constraints | -| DWARF expression handling | Evaluate DWARF in userspace and collect values via eBPF programs | Translate DWARF operations into internal representations and lower them into eBPF instruction sequences | +| Variable access (params, locals, globals) | Supported. Build PC-context read plans with gimli-backed DWARF data; render by real types; naturally ASLR and PIE friendly | Supported. DWARF location expressions are lowered through SystemTap's pipeline into eBPF-compatible logic, with verifier and stack constraints | +| DWARF expression handling | Convert DWARF locations into semantic read plans and lower supported plans into eBPF runtime reads | Translate DWARF operations into internal representations and lower them into eBPF instruction sequences | | Stack unwinding (CFI) | Not supported yet; planned via `.eh_frame` unwinding | Not supported in the eBPF backend | | Event transport and formatting | RingBuf (on newer kernels) or PerfEventArray; configurable pages and event size; built-in dump helpers such as `{:x.N}`, `{:s.N}`, and `{:p}` | PERF_EVENT_ARRAY plus userspace formatting/interpreter flow; formatting and string handling are more constrained | | BTF, CO-RE, linkage | Aya ecosystem, prefer RingBuf; not centered on BTF or CO-RE | No BTF or CO-RE focus; minimal libbpf-style backend | diff --git a/docs/zh/architecture.md b/docs/zh/architecture.md index 3a46311d..de26c57d 100644 --- a/docs/zh/architecture.md +++ b/docs/zh/architecture.md @@ -57,7 +57,7 @@ GhostScope 使用 Cargo workspace 进行模块化设计: |-------|------| | **ghostscope** | 主程序和运行时协调器 - 通过异步事件循环协调所有组件 | | **ghostscope-compiler** | 脚本编译流水线 - 通过 LLVM 将用户脚本转换为经过验证的 eBPF 字节码 | -| **ghostscope-dwarf** | 调试信息分析器 - 提供跨模块符号解析和类型信息 | +| **ghostscope-dwarf** | PC 上下文 DWARF 语义引擎 - 解析源码位置、可见变量、类型布局、地址映射和编译器读取计划 | | **ghostscope-loader** | eBPF 程序生命周期管理器 - 通过 Aya 处理 uprobe 附加和 ring buffer 管理 | | **ghostscope-ui** | 终端用户界面 - 实现基于 TEA (The Elm Architecture) 模式的交互式 TUI | | **ghostscope-protocol** | 通信协议 - 定义 eBPF 与用户态数据交换的消息格式 | @@ -88,9 +88,9 @@ GhostScope 使用 Cargo workspace 进行模块化设计: **关键特性**:渐进式加载,带有 UI 进度更新回调。 -### 3. DWARF 分析器 +### 3. DWARF 语义引擎 -**角色**:高性能多模块调试信息系统。 +**角色**:高性能多模块调试信息系统,以及基于 PC 上下文的语义规划器。 **核心优化**: @@ -115,6 +115,12 @@ GhostScope 使用 Cargo workspace 进行模块化设计: - 虚拟地址到文件偏移的转换 - 针对特定进程追踪的运行时地址映射 +5. **PC 上下文读取计划** + - 在指定 probe PC 上解析局部变量、参数、全局变量和 inline 作用域 + - 向编译器输出带类型的读取计划,而不是暴露原始 DWARF 位置 + - 保留 optimized-out、需要重定位的绝对地址、value-backed 聚合等语义差异 + - 当变量可见但无法安全 lower 时,给出编译期诊断 + TODO: 但是依然很慢,需要继续研究 GDB 是怎么提升解析 DWARF 性能的。 ### 4. 编译流水线 @@ -135,9 +141,9 @@ TODO: 但是依然很慢,需要继续研究 GDB 是怎么提升解析 DWARF ┌──────────────────────────────────────────────────────────┐ │ 阶段 2:LLVM IR 生成 │ │ │ -│ AST + DWARF 信息 │ +│ AST + PC 上下文 + DWARF 读取计划 │ │ ↓ │ -│ 符号解析(变量、类型、位置) │ +│ 计划 Lowering(变量、类型、可用性) │ │ ↓ │ │ LLVM IR(类型安全的中间表示) │ └──────────────────────────────────────────────────────────┘ diff --git a/docs/zh/comparison.md b/docs/zh/comparison.md index e1ee3b03..467b6880 100644 --- a/docs/zh/comparison.md +++ b/docs/zh/comparison.md @@ -68,17 +68,17 @@ GhostScope 的目标很明确:**针对带有 DWARF 调试信息的活跃进程 | 维度 | GhostScope | perf probe / perf uprobes | |---|---|---| -| 定位 | 面向用户态源码语义恢复的专用 tracer,带运行时 DWARF 求值、小型 DSL 和 TUI / session 工作流 | 偏声明式的 probe 定义前端,再接上更完整的 perf 记录和分析链路 | +| 定位 | 面向用户态源码语义恢复的专用 tracer,带 PC 上下文 DWARF 规划、小型 DSL 和 TUI / session 工作流 | 偏声明式的 probe 定义前端,再接上更完整的 perf 记录和分析链路 | | 可编程性与安全模型 | 基于 eBPF 的采集逻辑,可做可编程过滤和格式化;灵活性由 verifier 约束 | 能力面更窄、也更偏声明式:定义 probe 点和 fetchargs,但不是 eBPF 那种“每次命中都运行自定义逻辑”的编程模型 | | 源码级前端 | 函数、源码行、指令级 tracing 都是核心路径 | `perf probe` 原生就很擅长函数、源码行、局部变量以及 inline 相关的 probe 发现 | -| 变量获取方式 | 运行时求值 DWARF,读取局部变量、参数和全局变量,并在 tracer UI 中按真实类型渲染 | 更偏声明式 fetchargs,可直接写局部变量、参数、寄存器、符号、数组和返回值 | +| 变量获取方式 | 编译/加载期基于 DWARF 生成读取计划,再由 eBPF 在运行时读取局部变量、参数和全局变量,并按真实类型渲染 | 更偏声明式 fetchargs,可直接写局部变量、参数、寄存器、符号、数组和返回值 | | inline 与发现体验 | 有不错的源码驱动附着体验,但工作流仍围绕 GhostScope 自己的 tracer 模型 | 在线、函数和 inline 相关搜索上更成熟,例如 `--line`、`--vars`、`--no-inlines` 这类入口 | | 命中后的处理 | 可以先把结构化数据做过滤、采样、整形,再送到用户态 | 主要还是固定事件字段提取,然后交给 perf 的记录和分析工具链 | | 输出与消费链路 | RingBuf 或 PerfEventArray,接自定义实时 reader / TUI | 常见链路是 `perf probe` -> `perf record` -> `perf.data` -> `perf report` 或 `perf script` | | 最擅长 | 面向生产在线定位的用户态诊断,结构化输出和专用运行时工作流更完整 | 快速打一针式 probe,以及复用现有 perf 生态 | | 代价 | 更偏有主见的专用工具,不是通用 perf 工具箱 | 可编程性弱于 eBPF tracer,也不那么适合自定义实时处理 | -如果你想要的是一个面向线上诊断的专用 tracer,强调运行时 DWARF 语义、可编程过滤以及更友好的实时定位工作流,GhostScope 更合适。如果你只是想快速在函数、源码行或局部变量上落一个 probe,并继续沿用 perf 生态,perf 会更顺手。 +如果你想要的是一个面向线上诊断的专用 tracer,强调 PC 上下文 DWARF 语义、可编程过滤以及更友好的实时定位工作流,GhostScope 更合适。如果你只是想快速在函数、源码行或局部变量上落一个 probe,并继续沿用 perf 生态,perf 会更顺手。 一句话概括:`perf probe` 更像“语义更固定、拿来就用”的前端,而不是“完全不可配置”;GhostScope 这类基于 eBPF 的 tracer 则用更高的命中后处理可编程性,换来更丰富的实时工作流。 @@ -87,10 +87,10 @@ GhostScope 的目标很明确:**针对带有 DWARF 调试信息的活跃进程 | 维度 | GhostScope | bpftrace | |---|---|---| | 定位 | “DWARF 感知”的用户态观测,偏源码语义还原 | 通用 eBPF 动态追踪器,偏事件统计和观测 | -| DWARF 使用 | 运行时求值 DWARF 表达式,读参、局部变量和全局变量 | 擅长参数、结构体和事件流,但不以运行时求值 location expr 为核心 | +| DWARF 使用 | 编译/加载期根据 DWARF 生成变量读取计划,再由 eBPF 读取参数、局部变量和全局变量 | 擅长参数、结构体和事件流,但不以 PC 上下文变量读取规划为核心 | | 附着粒度与符号 | 行表驱动源码行、指令级附着,也支持函数级 tracing | 入口、返回、函数内偏移、绝对位置和事件探针;没有内建的源码行到地址工作流 | | 可观测数据 | 支持局部变量、参数和全局变量,并能按真实类型渲染值 | 很适合参数、结构体和事件聚合,但不强调任意用户态实时状态还原 | -| ASLR 影响 | 运行时按 DWARF 计算,天然适配 ASLR 和 PIE | `uaddr()` 这一类全局变量读取方式在 ASLR 和 PIE 下会更别扭,甚至不可用 | +| ASLR 影响 | DWARF 读取计划会保留 PIE、共享库和绝对地址值所需的重定位语义 | `uaddr()` 这一类全局变量读取方式在 ASLR 和 PIE 下会更别扭,甚至不可用 | | 交互体验 | TUI 友好界面,可以不中断观测 | 更偏脚本输出和聚合,交互性较弱 | | 最擅长 | 从真实代码路径恢复用户态状态 | 快速关联多类事件源 | | 代价 | 范围更聚焦 | 对源码级用户态诊断没那么聚焦 | @@ -105,8 +105,8 @@ GhostScope 的目标很明确:**针对带有 DWARF 调试信息的活跃进程 |---|---|---| | 定位与范围 | “DWARF 感知”的用户态观测,面向生产 printf 调试和交互式工作流 | 更宽的追踪框架,覆盖内核和用户态,也有 eBPF 后端 | | 源码行/语句级设点 | 支持,行级附着是核心路径 | 支持,statement probe 可以解析后附着 | -| 变量访问(参/局/全) | 支持。运行时用 gimli 求值 DWARF,按真实类型渲染,天然适配 ASLR 和 PIE | 支持。DWARF 位置表达式会经过 SystemTap 的处理链路降为 eBPF 可执行逻辑,但要受验证器和栈限制 | -| DWARF 表达式处理 | 直接在用户态求值 DWARF,再通过 eBPF 程序取值 | 把 DWARF 操作翻译成内部表示,再继续降成 eBPF 指令序列 | +| 变量访问(参/局/全) | 支持。基于 gimli 读取 DWARF,并生成 PC 上下文读取计划;按真实类型渲染,天然适配 ASLR 和 PIE | 支持。DWARF 位置表达式会经过 SystemTap 的处理链路降为 eBPF 可执行逻辑,但要受验证器和栈限制 | +| DWARF 表达式处理 | 将 DWARF 位置转换成语义读取计划,并把支持的计划 lower 成 eBPF 运行时读取 | 把 DWARF 操作翻译成内部表示,再继续降成 eBPF 指令序列 | | 栈回溯(CFI) | 还不支持,计划通过 `.eh_frame` 支持 | eBPF 后端暂不支持 | | 事件传输/格式化 | 新内核优先 RingBuf,也支持 PerfEventArray;页数和事件大小可配;内置 `{:x.N}`、`{:s.N}`、`{:p}` 等 dump helper | 更偏 PERF_EVENT_ARRAY + 用户态解释/格式化流程,格式和字符串能力更受约束 | | BTF/CO-RE/链接 | Aya 生态,优先 RingBuf;不以 BTF/CO-RE 为核心 | 不以 BTF/CO-RE 为核心,更接近最小 libbpf 风格后端 | diff --git a/e2e-tests/tests/dwarf_index_regressions.rs b/e2e-tests/tests/dwarf_index_regressions.rs index 0cce0994..5610649e 100644 --- a/e2e-tests/tests/dwarf_index_regressions.rs +++ b/e2e-tests/tests/dwarf_index_regressions.rs @@ -612,6 +612,32 @@ async fn test_inline_callsite_clang_dwarf5_resolves_debug_addr_entry_pc() -> any !inline_addrs.is_empty(), "No DWARF addresses found for inline_callsite_program.c:{INLINE_TRACE_LINE}" ); + let ctx = analyzer.resolve_pc(&inline_addrs[0])?; + assert_eq!( + ctx.is_inline, + Some(true), + "expected inline PC context for {INLINE_TRACE_LINE}: {ctx:?}" + ); + assert!( + !ctx.inline_chain.is_empty(), + "expected inline chain for {INLINE_TRACE_LINE}: {ctx:?}" + ); + assert!( + ctx.inline_chain + .iter() + .any(|frame| frame.context.is_some() && frame.function_name.as_deref() == Some("add3")), + "expected add3 inline frame with context id: {ctx:?}" + ); + assert!( + ctx.inline_chain.iter().any(|frame| { + frame.function_name.as_deref() == Some("add3") + && frame + .call_site + .as_ref() + .is_some_and(|call_site| call_site.line_number > 0) + }), + "expected add3 inline frame with call-site line info: {ctx:?}" + ); let target = spawn_inline_callsite_program(binary_path).await?; let query_result: anyhow::Result<()> = async { let pid_analyzer = ghostscope_dwarf::DwarfAnalyzer::from_pid(target.host_pid()).await?; diff --git a/e2e-tests/tests/entry_value_recovery_execution.rs b/e2e-tests/tests/entry_value_recovery_execution.rs index 0af81ac1..fc515822 100644 --- a/e2e-tests/tests/entry_value_recovery_execution.rs +++ b/e2e-tests/tests/entry_value_recovery_execution.rs @@ -7,7 +7,7 @@ use common::{ targets::{TargetHandle, TargetLauncher}, FixtureCompiler, FIXTURES, }; -use ghostscope_dwarf::{ComputeStep, MemoryAccessSize}; +use ghostscope_dwarf::{CfaRulePlan, ComputeStep, MemoryAccessSize, RegisterRecoveryPlan}; use gimli::constants; use gimli::write::{ Address, AttributeValue as WriteAttributeValue, Dwarf as WriteDwarf, EndianVec, @@ -724,9 +724,15 @@ async fn test_recover_caller_frame_exposes_pc_and_callee_saved_steps() -> anyhow "No DWARF addresses found for {FIXTURE_SOURCE}:{POST_CALL_TRACE_LINE}" ); - let recovery = analyzer + let recovery_by_address = analyzer .recover_caller_frame(&addrs[0], &[3, 16])? .ok_or_else(|| anyhow::anyhow!("no caller-frame recovery returned"))?; + let ctx = analyzer.resolve_pc(&addrs[0])?; + let recovery = analyzer + .recover_caller_frame_for_context(&ctx, &[3, 16])? + .ok_or_else(|| anyhow::anyhow!("no caller-frame recovery returned from PC context"))?; + + assert_eq!(recovery, recovery_by_address); assert_eq!(recovery.return_address_register, 16); assert!( @@ -763,3 +769,69 @@ async fn test_recover_caller_frame_exposes_pc_and_callee_saved_steps() -> anyhow Ok(()) } + +#[tokio::test] +async fn test_compact_unwind_table_exposes_pc_row() -> anyhow::Result<()> { + init(); + if !fixture_compiler_available(FixtureCompiler::ClangDwarf5) { + eprintln!("Skipping compact unwind table test because clang is unavailable"); + return Ok(()); + } + + let binary_path = + FIXTURES.get_test_binary_with_compiler(FIXTURE_NAME, FixtureCompiler::ClangDwarf5)?; + let analyzer = ghostscope_dwarf::DwarfAnalyzer::from_exec_path(&binary_path).await?; + let addrs = analyzer.lookup_addresses_by_source_line(FIXTURE_SOURCE, POST_CALL_TRACE_LINE); + anyhow::ensure!( + !addrs.is_empty(), + "No DWARF addresses found for {FIXTURE_SOURCE}:{POST_CALL_TRACE_LINE}" + ); + + let ctx = analyzer.resolve_pc(&addrs[0])?; + let table_by_context = analyzer + .compact_unwind_table_for_context(&ctx)? + .ok_or_else(|| anyhow::anyhow!("no compact unwind table returned from PC context"))?; + let table_by_module = analyzer + .compact_unwind_table_for_module(ctx.module)? + .ok_or_else(|| anyhow::anyhow!("no compact unwind table returned for module"))?; + + assert_eq!(table_by_context, table_by_module); + + let stats = table_by_context.stats(); + assert!(stats.row_count > 0, "compact unwind table is empty"); + assert!( + stats.bpf_supported_rows > 0, + "expected at least one BPF-fast-path unwind row: {stats:?}" + ); + assert!( + table_by_context + .rows + .windows(2) + .all(|pair| (pair[0].pc_start, pair[0].pc_end) <= (pair[1].pc_start, pair[1].pc_end)), + "compact unwind rows should be sorted by PC" + ); + + let row = table_by_context + .row_for_pc(ctx.normalized_pc) + .ok_or_else(|| anyhow::anyhow!("no compact unwind row for PC context"))?; + let row_by_context = analyzer + .compact_unwind_row_for_context(&ctx)? + .ok_or_else(|| anyhow::anyhow!("no direct compact unwind row for PC context"))?; + assert_eq!(&row_by_context, row); + assert_eq!(row.module, ctx.module); + assert_eq!(row.return_address_register, 16); + assert!(matches!( + row.cfa, + CfaRulePlan::RegPlusOffset { .. } | CfaRulePlan::Expression { .. } + )); + assert!( + !matches!( + row.return_address, + RegisterRecoveryPlan::Undefined | RegisterRecoveryPlan::Unsupported { .. } + ), + "return address recovery should be materialized: {:?}", + row.return_address + ); + + Ok(()) +} diff --git a/e2e-tests/tests/member_pointer_compilation.rs b/e2e-tests/tests/member_pointer_compilation.rs index 4c5a76ac..951ad92b 100644 --- a/e2e-tests/tests/member_pointer_compilation.rs +++ b/e2e-tests/tests/member_pointer_compilation.rs @@ -8,6 +8,17 @@ async fn compile_member_pointer_script( script: &str, opt_level: OptimizationLevel, ) -> anyhow::Result { + compile_member_pointer_script_result(script, opt_level) + .await? + .map_err(|e| anyhow::anyhow!("compile_script failed: {e}")) +} + +async fn compile_member_pointer_script_result( + script: &str, + opt_level: OptimizationLevel, +) -> anyhow::Result< + std::result::Result, +> { let binary_path = FIXTURES.get_test_binary_with_opt("member_pointer_program", opt_level)?; let analyzer = ghostscope_dwarf::DwarfAnalyzer::from_exec_path(&binary_path) .await @@ -17,8 +28,26 @@ async fn compile_member_pointer_script( ..Default::default() }; - ghostscope_compiler::compile_script(script, &analyzer, None, Some(1), &compile_options) - .map_err(|e| anyhow::anyhow!("compile_script failed: {e}")) + Ok(ghostscope_compiler::compile_script( + script, + &analyzer, + None, + Some(1), + &compile_options, + )) +} + +async fn member_pointer_pc(opt_level: OptimizationLevel) -> anyhow::Result { + let binary_path = FIXTURES.get_test_binary_with_opt("member_pointer_program", opt_level)?; + let analyzer = ghostscope_dwarf::DwarfAnalyzer::from_exec_path(&binary_path) + .await + .map_err(|e| anyhow::anyhow!("failed to load DWARF for member_pointer_program: {e}"))?; + let addrs = analyzer.lookup_addresses_by_source_line("member_pointer_program.c", TRACE_LINE); + anyhow::ensure!( + !addrs.is_empty(), + "No DWARF addresses found for member_pointer_program.c:{TRACE_LINE}" + ); + Ok(addrs[0].address) } #[tokio::test] @@ -38,40 +67,40 @@ async fn test_member_pointer_planner_resolves_o2_chain_accesses() -> anyhow::Res for module_address in &addrs { let key_data = analyzer - .plan_chain_access( + .plan_chain_access_read_plan( module_address, "h", &["key".to_string(), "data".to_string()], ) .map_err(|e| { anyhow::anyhow!( - "plan_chain_access failed for h.key.data at 0x{:x}: {}", + "plan_chain_access_read_plan failed for h.key.data at 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( key_data.is_some(), - "plan_chain_access returned None for h.key.data at 0x{:x}", + "plan_chain_access_read_plan returned None for h.key.data at 0x{:x}", module_address.address ); let header_pos = analyzer - .plan_chain_access( + .plan_chain_access_read_plan( module_address, "r", &["header_in".to_string(), "pos".to_string()], ) .map_err(|e| { anyhow::anyhow!( - "plan_chain_access failed for r.header_in.pos at 0x{:x}: {}", + "plan_chain_access_read_plan failed for r.header_in.pos at 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( header_pos.is_some(), - "plan_chain_access returned None for r.header_in.pos at 0x{:x}", + "plan_chain_access_read_plan returned None for r.header_in.pos at 0x{:x}", module_address.address ); } @@ -79,6 +108,179 @@ async fn test_member_pointer_planner_resolves_o2_chain_accesses() -> anyhow::Res Ok(()) } +#[tokio::test] +async fn test_resolve_pc_context_reports_source_and_function() -> anyhow::Result<()> { + init(); + + let binary_path = + FIXTURES.get_test_binary_with_opt("member_pointer_program", OptimizationLevel::O2)?; + let analyzer = ghostscope_dwarf::DwarfAnalyzer::from_exec_path(&binary_path) + .await + .map_err(|e| anyhow::anyhow!("failed to load DWARF for member_pointer_program: {e}"))?; + let addrs = analyzer.lookup_addresses_by_source_line("member_pointer_program.c", TRACE_LINE); + anyhow::ensure!( + !addrs.is_empty(), + "No DWARF addresses found for member_pointer_program.c:{TRACE_LINE}" + ); + + let ctx = analyzer.resolve_pc(&addrs[0])?; + assert_eq!(ctx.module, ghostscope_dwarf::ModuleId(0)); + assert_eq!(ctx.pc, addrs[0].address); + assert_eq!(ctx.normalized_pc, addrs[0].address); + assert_eq!(ctx.function_name.as_deref(), Some("trace_member_pointer")); + assert!(ctx.cu.is_some(), "PC context should carry a CU id: {ctx:?}"); + let function = ctx.function.expect("PC context should carry a function id"); + assert_eq!(function.declaration.module, ctx.module); + assert_eq!(function.declaration.cu, ctx.cu.expect("CU id")); + assert_eq!(ctx.is_inline, Some(false)); + assert!( + ctx.inline_chain.is_empty(), + "non-inline trace point should not report inline frames: {ctx:?}" + ); + assert_eq!( + ctx.address_space.module_path.as_deref(), + Some(binary_path.as_path()) + ); + + let line = ctx.line.expect("PC context should include source line"); + assert!( + line.file_path.ends_with("member_pointer_program.c"), + "unexpected source file: {}", + line.file_path + ); + assert!( + (TRACE_LINE..=TRACE_LINE + 1).contains(&line.line_number), + "unexpected source line: {}", + line.line_number + ); + + Ok(()) +} + +#[tokio::test] +async fn test_visible_variables_consumes_pc_context() -> anyhow::Result<()> { + init(); + + let binary_path = + FIXTURES.get_test_binary_with_opt("member_pointer_program", OptimizationLevel::O2)?; + let analyzer = ghostscope_dwarf::DwarfAnalyzer::from_exec_path(&binary_path) + .await + .map_err(|e| anyhow::anyhow!("failed to load DWARF for member_pointer_program: {e}"))?; + let addrs = analyzer.lookup_addresses_by_source_line("member_pointer_program.c", TRACE_LINE); + anyhow::ensure!( + !addrs.is_empty(), + "No DWARF addresses found for member_pointer_program.c:{TRACE_LINE}" + ); + + let ctx = analyzer.resolve_pc(&addrs[0])?; + let ghostscope_dwarf::VisibleVariablesResult { + variables, + diagnostics, + } = analyzer.visible_variables_with_diagnostics(&ctx)?; + + assert!( + variables.iter().any(|var| { + var.name == "r" + && var.availability.is_available() + && var.dwarf_type.is_some() + && !var.is_artificial + }), + "expected visible available variable 'r'. Variables: {variables:?}" + ); + assert!( + variables.iter().any(|var| { + var.name == "h" + && var.availability.is_available() + && var.dwarf_type.is_some() + && !var.is_artificial + }), + "expected visible available variable 'h'. Variables: {variables:?}" + ); + assert!( + diagnostics + .iter() + .all(|diagnostic| diagnostic.pc == ctx.normalized_pc), + "diagnostics should be tied to the queried PC: {:?}", + diagnostics + ); + + Ok(()) +} + +#[tokio::test] +async fn test_plan_variable_by_name_uses_pc_context() -> anyhow::Result<()> { + init(); + + let binary_path = + FIXTURES.get_test_binary_with_opt("member_pointer_program", OptimizationLevel::O2)?; + let analyzer = ghostscope_dwarf::DwarfAnalyzer::from_exec_path(&binary_path) + .await + .map_err(|e| anyhow::anyhow!("failed to load DWARF for member_pointer_program: {e}"))?; + let addrs = analyzer.lookup_addresses_by_source_line("member_pointer_program.c", TRACE_LINE); + anyhow::ensure!( + !addrs.is_empty(), + "No DWARF addresses found for member_pointer_program.c:{TRACE_LINE}" + ); + + let ctx = analyzer.resolve_pc(&addrs[0])?; + let plan = analyzer + .plan_variable_by_name(&ctx, "r")? + .ok_or_else(|| anyhow::anyhow!("expected variable read plan for 'r'"))?; + + assert_eq!(plan.name, "r"); + assert!(plan.availability.is_available(), "plan: {plan:?}"); + assert!(plan.dwarf_type.is_some(), "plan: {plan:?}"); + assert!(plan.declaration.is_some(), "plan: {plan:?}"); + assert!(plan.type_id.is_some(), "plan: {plan:?}"); + assert!(!plan.is_artificial, "plan: {plan:?}"); + assert!(analyzer + .plan_variable_by_name(&ctx, "__ghostscope_missing")? + .is_none()); + + let variable_id = ghostscope_dwarf::VariableId { + declaration: plan + .declaration + .expect("plan_variable_by_name should carry declaration id"), + }; + let plan_by_id = analyzer + .plan_variable(&ctx, variable_id)? + .ok_or_else(|| anyhow::anyhow!("expected variable read plan by id for 'r'"))?; + assert_eq!(plan_by_id.name, plan.name); + assert_eq!(plan_by_id.declaration, plan.declaration); + assert_eq!(plan_by_id.type_id, plan.type_id); + + let id_header_pos = analyzer + .plan_variable_access( + &ctx, + variable_id, + &ghostscope_dwarf::VariableAccessPath::fields(["header_in", "pos"]), + )? + .ok_or_else(|| anyhow::anyhow!("expected id-based access plan for 'r.header_in.pos'"))?; + assert_eq!(id_header_pos.name, "r.header_in.pos"); + assert!(id_header_pos.availability.is_available()); + assert!(id_header_pos.dwarf_type.is_some()); + + let header_pos = analyzer + .plan_variable_access_by_name( + &ctx, + "r", + &ghostscope_dwarf::VariableAccessPath::fields(["header_in", "pos"]), + )? + .ok_or_else(|| anyhow::anyhow!("expected access plan for 'r.header_in.pos'"))?; + + assert_eq!(header_pos.name, "r.header_in.pos"); + assert!( + header_pos.availability.is_available(), + "access plan: {header_pos:?}" + ); + assert!( + header_pos.dwarf_type.is_some(), + "access plan should carry final member type: {header_pos:?}" + ); + + Ok(()) +} + #[tokio::test] async fn test_memcmp_infers_len_for_member_pointer_key_data_o2() -> anyhow::Result<()> { init(); @@ -105,6 +307,11 @@ trace {}:68 {{ result.target_info, result.failed_targets ); + assert_eq!( + result.trace_count, + result.uprobe_configs.len(), + "trace_count should report generated uprobe configs" + ); Ok(()) } @@ -166,6 +373,119 @@ trace {}:68 {{ Ok(()) } +#[tokio::test] +async fn test_local_unknown_member_reports_compile_error_o2() -> anyhow::Result<()> { + init(); + + let binary_path = + FIXTURES.get_test_binary_with_opt("member_pointer_program", OptimizationLevel::O2)?; + let source_path = binary_path + .parent() + .ok_or_else(|| anyhow::anyhow!("member_pointer_program has no parent directory"))? + .join("member_pointer_program.c"); + let script = format!( + r#" +trace {}:68 {{ + print r.no_such_member; +}} +"#, + source_path.display() + ); + + let result = compile_member_pointer_script(&script, OptimizationLevel::O2).await?; + assert!( + result.uprobe_configs.is_empty(), + "invalid member access should not produce uprobe configs: {result:?}" + ); + assert!( + !result.failed_targets.is_empty(), + "invalid member access should report failed targets: {result:?}" + ); + let message = result + .failed_targets + .iter() + .map(|target| target.error_message.as_str()) + .collect::>() + .join("\n"); + assert!( + message.contains("Unknown member 'no_such_member'"), + "unexpected compile error: {message}" + ); + Ok(()) +} + +#[tokio::test] +async fn test_direct_address_unknown_member_returns_compile_error_o2() -> anyhow::Result<()> { + init(); + + let pc = member_pointer_pc(OptimizationLevel::O2).await?; + let script = format!( + r#" +trace 0x{pc:x} {{ + print r.no_such_member; +}} +"# + ); + + let err = compile_member_pointer_script_result(&script, OptimizationLevel::O2) + .await? + .expect_err("invalid direct-address member access should fail compile_script"); + let message = err.user_message().into_owned(); + + assert!( + message.contains("Failed targets:"), + "expected failed-target details in compile error: {message}" + ); + assert!( + message.contains(&format!("0x{pc:x}")), + "expected direct address target in compile error: {message}" + ); + assert!( + message.contains("Unknown member 'no_such_member'"), + "unexpected compile error: {message}" + ); + Ok(()) +} + +#[tokio::test] +async fn test_module_address_unknown_member_returns_compile_error_o2() -> anyhow::Result<()> { + init(); + + let binary_path = + FIXTURES.get_test_binary_with_opt("member_pointer_program", OptimizationLevel::O2)?; + let module = binary_path + .file_name() + .ok_or_else(|| anyhow::anyhow!("member_pointer_program has no file name"))? + .to_string_lossy(); + let pc = member_pointer_pc(OptimizationLevel::O2).await?; + let script = format!( + r#" +trace {module}:0x{pc:x} {{ + print r.no_such_member; +}} +"# + ); + + let err = compile_member_pointer_script_result(&script, OptimizationLevel::O2) + .await? + .expect_err("invalid module-address member access should fail compile_script"); + let message = err.user_message().into_owned(); + + assert!( + message.contains("Failed targets:"), + "expected failed-target details in compile error: {message}" + ); + assert!( + message.contains(&format!("{module}:0x{pc:x}")), + "expected module-qualified target in compile error: {message}" + ); + assert!( + message.contains("Unknown member 'no_such_member'"), + "unexpected compile error: {message}" + ); + Ok(()) +} + #[tokio::test] async fn test_member_pointer_fixture_builds_each_optimized_variant() -> anyhow::Result<()> { init(); @@ -212,10 +532,10 @@ async fn test_complex_bitfield_chain_planner_resolves_member_offsets() -> anyhow for module_address in &addrs { let active = analyzer - .plan_chain_access(module_address, "c", &["active".to_string()])? + .plan_chain_access_read_plan(module_address, "c", &["active".to_string()])? .ok_or_else(|| anyhow::anyhow!("missing plan for c.active at {:?}", module_address))?; let flags = analyzer - .plan_chain_access(module_address, "c", &["flags".to_string()])? + .plan_chain_access_read_plan(module_address, "c", &["flags".to_string()])? .ok_or_else(|| anyhow::anyhow!("missing plan for c.flags at {:?}", module_address))?; let expected_steps = vec![ @@ -228,20 +548,16 @@ async fn test_complex_bitfield_chain_planner_resolves_member_offsets() -> anyhow ghostscope_dwarf::ComputeStep::PushConstant(64), ghostscope_dwarf::ComputeStep::Add, ]; - let expected_eval = ghostscope_dwarf::EvaluationResult::MemoryLocation( - ghostscope_dwarf::LocationResult::ComputedLocation { - steps: expected_steps, - }, - ); + let expected_location = ghostscope_dwarf::VariableLocation::ComputedAddress(expected_steps); assert_eq!( - active.evaluation_result, expected_eval, - "unexpected c.active eval at 0x{:x}", + active.location, expected_location, + "unexpected c.active location at 0x{:x}", module_address.address ); assert_eq!( - flags.evaluation_result, expected_eval, - "unexpected c.flags eval at 0x{:x}", + flags.location, expected_location, + "unexpected c.flags location at 0x{:x}", module_address.address ); diff --git a/e2e-tests/tests/optimized_inline_call_value_execution.rs b/e2e-tests/tests/optimized_inline_call_value_execution.rs index 8b504604..4a655843 100644 --- a/e2e-tests/tests/optimized_inline_call_value_execution.rs +++ b/e2e-tests/tests/optimized_inline_call_value_execution.rs @@ -1,7 +1,7 @@ mod common; use common::{init, FIXTURES}; -use ghostscope_dwarf::{DirectValueResult, EvaluationResult}; +use ghostscope_dwarf::VariableLocation; use regex::Regex; use std::path::Path; use std::time::Duration; @@ -47,7 +47,7 @@ async fn run_ghostscope_with_script_for_target( } fn assert_not_internal_call_register_aliases( - parameters: &[ghostscope_dwarf::VariableWithEvaluation], + parameters: &[ghostscope_dwarf::VisibleVariable], address: u64, ) -> anyhow::Result<()> { let original_x = parameters @@ -60,13 +60,13 @@ fn assert_not_internal_call_register_aliases( .ok_or_else(|| anyhow::anyhow!("missing original_y at 0x{:x}", address))?; assert_ne!( - original_x.evaluation_result, - EvaluationResult::DirectValue(DirectValueResult::RegisterValue(5)), + original_x.location, + VariableLocation::RegisterValue { dwarf_reg: 5 }, "original_x aliased consume_pair's first argument register at 0x{address:x}: {parameters:?}" ); assert_ne!( - original_y.evaluation_result, - EvaluationResult::DirectValue(DirectValueResult::RegisterValue(4)), + original_y.location, + VariableLocation::RegisterValue { dwarf_reg: 4 }, "original_y aliased consume_pair's second argument register at 0x{address:x}: {parameters:?}" ); @@ -74,7 +74,7 @@ fn assert_not_internal_call_register_aliases( } fn assert_parameters_are_live_in_registers( - parameters: &[ghostscope_dwarf::VariableWithEvaluation], + parameters: &[ghostscope_dwarf::VisibleVariable], address: u64, ) -> anyhow::Result<()> { for parameter_name in ["original_x", "original_y"] { @@ -84,14 +84,11 @@ fn assert_parameters_are_live_in_registers( .ok_or_else(|| anyhow::anyhow!("missing {parameter_name} at 0x{address:x}"))?; assert!( - !matches!(parameter.evaluation_result, EvaluationResult::Optimized), + !matches!(parameter.location, VariableLocation::OptimizedOut), "{parameter_name} should still be live before consume_pair() at 0x{address:x}: {parameters:?}" ); assert!( - matches!( - parameter.evaluation_result, - EvaluationResult::DirectValue(DirectValueResult::RegisterValue(_)) - ), + matches!(parameter.location, VariableLocation::RegisterValue { .. }), "{parameter_name} should resolve to a direct register value before consume_pair() at 0x{address:x}: {parameters:?}" ); } diff --git a/e2e-tests/tests/optimized_inline_execution.rs b/e2e-tests/tests/optimized_inline_execution.rs index 92bbb8c5..a9ca8c3b 100644 --- a/e2e-tests/tests/optimized_inline_execution.rs +++ b/e2e-tests/tests/optimized_inline_execution.rs @@ -171,17 +171,17 @@ async fn test_optimized_inline_struct_member_access_resolves_inline_parameter_na ); for module_address in &addrs { let planned = analyzer - .plan_chain_access(module_address, "state", &["total_bytes".to_string()]) + .plan_chain_access_read_plan(module_address, "state", &["total_bytes".to_string()]) .map_err(|e| { anyhow::anyhow!( - "exec-path plan_chain_access failed for 0x{:x}: {}", + "exec-path plan_chain_access_read_plan failed for 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( planned.is_some(), - "exec-path plan_chain_access returned None for 0x{:x}", + "exec-path plan_chain_access_read_plan returned None for 0x{:x}", module_address.address ); } @@ -199,17 +199,17 @@ async fn test_optimized_inline_struct_member_access_resolves_inline_parameter_na ); for module_address in &pid_addrs { let planned = pid_analyzer - .plan_chain_access(module_address, "state", &["total_bytes".to_string()]) + .plan_chain_access_read_plan(module_address, "state", &["total_bytes".to_string()]) .map_err(|e| { anyhow::anyhow!( - "pid-backed plan_chain_access failed for 0x{:x}: {}", + "pid-backed plan_chain_access_read_plan failed for 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( planned.is_some(), - "pid-backed plan_chain_access returned None for 0x{:x}", + "pid-backed plan_chain_access_read_plan returned None for 0x{:x}", module_address.address ); } diff --git a/e2e-tests/tests/script_execution.rs b/e2e-tests/tests/script_execution.rs index c7a3f3b1..713e4201 100644 --- a/e2e-tests/tests/script_execution.rs +++ b/e2e-tests/tests/script_execution.rs @@ -513,6 +513,55 @@ trace calculate_something { Ok(()) } +#[tokio::test] +async fn test_address_trace_compile_failure_uses_failed_target_banner() -> anyhow::Result<()> { + init(); + ensure_global_cleanup_registered(); + + let binary_path = FIXTURES.get_test_binary("sample_program")?; + let analyzer = ghostscope_dwarf::DwarfAnalyzer::from_exec_path(&binary_path) + .await + .map_err(|e| anyhow::anyhow!("failed to load DWARF for sample_program: {e}"))?; + let addrs = analyzer.lookup_function_addresses("calculate_something"); + anyhow::ensure!( + !addrs.is_empty(), + "No DWARF addresses found for calculate_something" + ); + let pc = addrs[0].address; + + let script_content = format!( + r#" +trace 0x{pc:x} {{ + let p = "A"; + if memcmp(p, hex("41"), 1) {{ print "OK"; }} else {{ print "NO"; }} +}} +"# + ); + + let (exit_code, _stdout, stderr) = run_ghostscope_with_script(&script_content, 2).await?; + assert!( + exit_code != 0, + "expected non-zero exit due to compile error; stderr={stderr}" + ); + + let has_banner = stderr.contains("No uprobe configurations created") + || stderr.contains("Script compilation failed"); + assert!( + has_banner && stderr.contains("Failed targets:"), + "Expected failed-targets banner for address trace. stderr={stderr}" + ); + assert!( + stderr.contains(&format!("0x{pc:x}")) + && stderr.contains("expression is not a pointer/address"), + "Expected address target and pointer/address reason. stderr={stderr}" + ); + assert!( + !stderr.contains("Code generation error:"), + "User-facing stderr should not expose CodeGen wrapper. stderr={stderr}" + ); + Ok(()) +} + #[tokio::test] async fn test_pointer_ordered_comparison_is_rejected_e2e() -> anyhow::Result<()> { init(); @@ -1021,12 +1070,19 @@ trace sample_program.c:16 { let has_func = stdout.contains("FUNC:"); let has_line16 = stdout.contains("LINE16:"); - assert!( - has_func, - "Expected function-level trace output for {} but none was captured. STDOUT: {}", - opt_level.description(), - stdout - ); + if *opt_level == OptimizationLevel::Debug { + assert!( + has_func, + "Expected function-level trace output for {} but none was captured. STDOUT: {}", + opt_level.description(), + stdout + ); + } else if !has_func { + println!( + "Function-level trace did not fire for {}; calculate_something may be inlined", + opt_level.description() + ); + } assert!( has_line16, "Expected line-level trace output for {} but none was captured. STDOUT: {}", @@ -1112,13 +1168,14 @@ trace sample_program.c:16 { ); } } else { - // In optimized builds, allow optimized-out markers in place of numeric validations, - // but ensure we never emit placeholder zeros. + // In optimized builds, the function body may be fully inlined into + // its caller. If a function-level trace fires, validate it, but do + // not require a hit from an out-of-line symbol that is not called. assert!( - !func_has_placeholder_zero, - "Should not emit placeholder optimized-out values in optimized builds. STDOUT: {stdout}" - ); - if func_validations == 0 && !func_has_optimized_marker { + !func_has_placeholder_zero, + "Should not emit placeholder optimized-out values in optimized builds. STDOUT: {stdout}" + ); + if has_func && func_validations == 0 && !func_has_optimized_marker { panic!( "❌ Expected function-level traces to be either numerically valid or marked as optimized-out. STDOUT: {stdout}" ); diff --git a/ghostscope-compiler/README.md b/ghostscope-compiler/README.md index 7053467e..16195cb0 100644 --- a/ghostscope-compiler/README.md +++ b/ghostscope-compiler/README.md @@ -1,6 +1,13 @@ # ghostscope-compiler -`ghostscope-compiler` turns GhostScope trace definitions into DWARF-aware eBPF programs. It parses the DSL, performs DWARF resolution, and emits IR that targets LLVM's BPF backend. +`ghostscope-compiler` turns GhostScope trace definitions into DWARF-aware eBPF +programs. It parses the DSL, asks `ghostscope-dwarf` for PC-context read plans, +and lowers those plans into IR that targets LLVM's BPF backend. + +The compiler should not reinterpret raw DWARF location expressions itself. DWARF +visibility, optimized-out state, ASLR-sensitive address handling, and semantic +diagnostics belong in `ghostscope-dwarf`; this crate consumes the resulting plan +and focuses on safe code generation. ## Build Requirements - LLVM 18.x with `llvm-config` available on `PATH` (or set `LLVM_CONFIG_PATH`) diff --git a/ghostscope-compiler/src/ebpf/codegen.rs b/ghostscope-compiler/src/ebpf/codegen.rs index 4b6902fb..59933111 100644 --- a/ghostscope-compiler/src/ebpf/codegen.rs +++ b/ghostscope-compiler/src/ebpf/codegen.rs @@ -29,7 +29,7 @@ struct PrintVarRuntimeMeta { #[derive(Debug, Clone)] enum ComplexArgSource<'ctx> { RuntimeRead { - eval_result: ghostscope_dwarf::EvaluationResult, + location: ghostscope_dwarf::VariableLocation, dwarf_type: ghostscope_dwarf::TypeInfo, module_for_offsets: Option, }, @@ -48,7 +48,7 @@ enum ComplexArgSource<'ctx> { bytes: Vec, }, AddressValue { - eval_result: ghostscope_dwarf::EvaluationResult, + location: ghostscope_dwarf::VariableLocation, module_for_offsets: Option, }, // Newly added: a value computed in LLVM at runtime (e.g., expression result) @@ -340,7 +340,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len: 8, source: ComplexArgSource::AddressValue { - eval_result: var.evaluation_result.clone(), + location: var.location.clone(), module_for_offsets: module_hint, }, }) @@ -351,13 +351,46 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { | E::ArrayAccess(_, _) | E::PointerDeref(_) | E::ChainAccess(_)) => { + if let Some(plan) = self.query_dwarf_for_complex_expr_plan(expr)? { + let pc_address = self.get_compile_time_context()?.pc_address; + let (var_name, dwarf_type, location) = + self.variable_read_plan_to_runtime_read_parts(plan, pc_address)?; + let display_name = if matches!(expr, E::PointerDeref(_)) { + self.expr_to_name(expr) + } else { + var_name + }; + if matches!(location, ghostscope_dwarf::VariableLocation::OptimizedOut) { + return Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type), + access_path: Vec::new(), + data_len: 0, + source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, + }); + } + let data_len = Self::compute_read_size_for_type(&dwarf_type); + if data_len == 0 { + return Err(CodeGenError::TypeSizeNotAvailable(display_name)); + } + let module_hint = self.take_module_hint(); + return Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type.clone()), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::RuntimeRead { + location, + dwarf_type, + module_for_offsets: module_hint, + }, + }); + } + let var = self .query_dwarf_for_complex_expr(expr)? .ok_or_else(|| CodeGenError::VariableNotFound(format!("{expr:?}")))?; - if matches!( - var.evaluation_result, - ghostscope_dwarf::EvaluationResult::Optimized - ) { + if var.availability == ghostscope_dwarf::Availability::OptimizedOut { let ti = ghostscope_protocol::type_info::TypeInfo::OptimizedOut { name: var.name.clone(), }; @@ -385,7 +418,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len, source: ComplexArgSource::RuntimeRead { - eval_result: var.evaluation_result.clone(), + location: var.location.clone(), dwarf_type: dwarf_type.clone(), module_for_offsets: module_hint, }, @@ -397,10 +430,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { if let Some(v) = self.query_dwarf_for_variable(name)? { if let Some(ref t) = v.dwarf_type { // If DWARF reports optimized-out at this PC, emit OptimizedOut type with no data - if matches!( - v.evaluation_result, - ghostscope_dwarf::EvaluationResult::Optimized - ) { + if v.availability == ghostscope_dwarf::Availability::OptimizedOut { let ti = ghostscope_protocol::type_info::TypeInfo::OptimizedOut { name: v.name.clone(), }; @@ -414,12 +444,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, }); } - let is_link_addr = matches!( - v.evaluation_result, - ghostscope_dwarf::EvaluationResult::MemoryLocation( - ghostscope_dwarf::LocationResult::Address(_) - ) - ); + let is_link_addr = + matches!(v.location, ghostscope_dwarf::VariableLocation::Address(_)); if Self::is_simple_typeinfo(t) && !is_link_addr { // Prefer computed value to avoid runtime reads let compiled = self.compile_expr(expr)?; @@ -493,7 +519,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len, source: ComplexArgSource::RuntimeRead { - eval_result: v.evaluation_result.clone(), + location: v.location.clone(), dwarf_type: t.clone(), module_for_offsets: module_hint, }, @@ -516,7 +542,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len, source: ComplexArgSource::RuntimeRead { - eval_result: v.evaluation_result.clone(), + location: v.location.clone(), dwarf_type: t.clone(), module_for_offsets: module_hint, }, @@ -583,7 +609,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { if var.dwarf_type.is_some() { // Determine pointed-to/element type and compute location with scaled offset let index = sign * int_side; - let (eval_result, elem_ty) = + let (location, elem_ty) = self.compute_pointed_location_with_index(ptr_side, index)?; let data_len = Self::compute_read_size_for_type(&elem_ty); let module_hint = self.take_module_hint(); @@ -601,7 +627,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len: 8, source: ComplexArgSource::AddressValue { - eval_result, + location, module_for_offsets: module_hint, }, }); @@ -614,7 +640,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len, source: ComplexArgSource::RuntimeRead { - eval_result, + location, dwarf_type: elem_ty, module_for_offsets: module_hint, }, @@ -706,7 +732,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Ok(1) } ComplexArgSource::RuntimeRead { - eval_result, + location, ref dwarf_type, module_for_offsets, } => { @@ -718,7 +744,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { }; self.generate_print_complex_variable_runtime( meta, - &eval_result, + &location, dwarf_type, module_for_offsets.as_deref(), )?; @@ -1930,8 +1956,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { CodeGenError::VariableNotFound(format!("{expr:?}")) })?; let mod_hint = self.take_module_hint(); - self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + self.variable_location_to_address_with_hint( + &var.location, None, mod_hint.as_deref(), )? @@ -2026,8 +2052,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { || CodeGenError::VariableNotFound(format!("{val_expr:?}")), )?; let mod_hint = self.take_module_hint(); - self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + self.variable_location_to_address_with_hint( + &var.location, None, mod_hint.as_deref(), )? @@ -2136,8 +2162,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { || CodeGenError::VariableNotFound(format!("{val_expr:?}")), )?; let mod_hint = self.take_module_hint(); - self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + self.variable_location_to_address_with_hint( + &var.location, None, mod_hint.as_deref(), )? @@ -2213,7 +2239,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { ); let compile_context = self.get_compile_time_context()?.clone(); - let variable_with_eval = match self.query_dwarf_for_variable(var_name)? { + let read_plan = match self.query_dwarf_for_variable(var_name)? { Some(var) => var, None => { return Err(CodeGenError::VariableNotFound(format!( @@ -2224,7 +2250,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { }; // Convert DWARF type information to TypeKind using existing method - let dwarf_type = variable_with_eval.dwarf_type.as_ref().ok_or_else(|| { + let dwarf_type = read_plan.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Variable has no DWARF type information".to_string()) })?; let type_encoding = TypeKind::from(dwarf_type); @@ -3276,7 +3302,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } ComplexArgSource::RuntimeRead { - eval_result, + location, dwarf_type, module_for_offsets, } => { @@ -3290,8 +3316,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; let size_val = i32_type.const_int(a.data_len as u64, false); // Compute source address; if link-time address, apply ASLR offsets via map - let src_addr = self.evaluation_result_to_address_with_hint( - eval_result, + let src_addr = self.variable_location_to_address_with_hint( + location, Some(apl_ptr), module_for_offsets.as_deref(), )?; @@ -3453,12 +3479,12 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { self.builder.position_at_end(cont2_block); } ComplexArgSource::AddressValue { - eval_result, + location, module_for_offsets, } => { // Compute address (apply ASLR if link-time address) and store as 8 bytes - let addr = self.evaluation_result_to_address_with_hint( - eval_result, + let addr = self.variable_location_to_address_with_hint( + location, Some(apl_ptr), module_for_offsets.as_deref(), )?; @@ -4329,7 +4355,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Some(var_info) => { info!( "Found DWARF variable: {} = {:?}", - var_name, var_info.evaluation_result + var_name, var_info.location ); // Require DWARF type information @@ -4340,8 +4366,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { })?; let compile_context = self.get_compile_time_context()?; - self.evaluate_result_to_llvm_value( - &var_info.evaluation_result, + self.variable_location_to_llvm_value( + &var_info.location, dwarf_type, var_name, compile_context.pc_address, @@ -4363,7 +4389,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { fn generate_print_complex_variable_runtime( &mut self, meta: PrintVarRuntimeMeta, - eval_result: &ghostscope_dwarf::EvaluationResult, + location: &ghostscope_dwarf::VariableLocation, dwarf_type: &ghostscope_dwarf::TypeInfo, module_hint: Option<&str>, ) -> Result<()> { @@ -4373,7 +4399,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path = %meta.access_path, type_size = dwarf_type.size(), data_len_limit = meta.data_len_limit, - eval = ?eval_result, + location = ?location, "generate_print_complex_variable_runtime: begin" ); // Compute sizes first, then reserve instruction region directly in accumulation buffer @@ -4689,11 +4715,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Compute source address with ASLR-aware helper, honoring module hint // Prefer a previously recorded module path for offsets; fall back handled in helper - let src_addr = self.evaluation_result_to_address_with_hint( - eval_result, - Some(status_ptr), - module_hint, - )?; + let src_addr = + self.variable_location_to_address_with_hint(location, Some(status_ptr), module_hint)?; tracing::trace!(src_addr = %{src_addr}, "generate_print_complex_variable_runtime: computed src_addr"); // Setup common types and casts diff --git a/ghostscope-compiler/src/ebpf/context.rs b/ghostscope-compiler/src/ebpf/context.rs index a998a46d..14803f34 100644 --- a/ghostscope-compiler/src/ebpf/context.rs +++ b/ghostscope-compiler/src/ebpf/context.rs @@ -38,11 +38,13 @@ pub enum CodeGenError { #[error("Builder error: {0}")] Builder(String), - // === Legacy variable management errors === + // === Variable lookup and availability errors === #[error("Variable not found: {0}")] VariableNotFound(String), #[error("Variable not in scope: {0}")] VariableNotInScope(String), + #[error("Variable unavailable: {0}")] + VariableUnavailable(String), #[error("Type error: {0}")] TypeError(String), #[error("Not implemented: {0}")] @@ -121,9 +123,6 @@ pub struct EbpfContext<'ctx, 'dw> { pub scope_stack: Vec>, } -// Temporary alias for backward compatibility during refactoring -pub type NewCodeGen<'ctx, 'dw> = EbpfContext<'ctx, 'dw>; - impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { /// Create a new eBPF code generation context pub fn new( diff --git a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs index 3d19d92b..f56d1f3f 100644 --- a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs +++ b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs @@ -1,12 +1,12 @@ //! DWARF debugging information bridge //! //! This module handles integration with DWARF debug information for -//! variable type resolution and evaluation result processing. +//! variable type resolution and read-plan lowering. use super::context::{CodeGenError, EbpfContext, Result}; use ghostscope_dwarf::{ - ComputeStep, DirectValueResult, EvaluationResult, LocationResult, MemoryAccessSize, TypeInfo, - VariableWithEvaluation, + AddressExpr, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, SectionType, + TypeInfo, VariableAccessPath, VariableAccessSegment, VariableLocation, VariableReadPlan, }; use ghostscope_process::module_probe; use inkwell::values::{BasicValueEnum, IntValue, PointerValue}; @@ -23,10 +23,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { if let Some(analyzer) = self.process_analyzer { if let Some(st) = analyzer.classify_section_for_address(module_path, link_addr) { return match st { - ghostscope_dwarf::core::SectionType::Text => 0, - ghostscope_dwarf::core::SectionType::Rodata => 1, - ghostscope_dwarf::core::SectionType::Data => 2, - ghostscope_dwarf::core::SectionType::Bss => 3, + SectionType::Text => 0, + SectionType::Rodata => 1, + SectionType::Data => 2, + SectionType::Bss => 3, _ => 2, }; } @@ -61,46 +61,83 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { TypeInfo::StructType { .. } | TypeInfo::UnionType { .. } | TypeInfo::ArrayType { .. } ) } - /// Convert EvaluationResult to LLVM value - pub fn evaluate_result_to_llvm_value( + /// Lower a semantic DWARF variable location to an LLVM value. + pub fn variable_location_to_llvm_value( &mut self, - evaluation_result: &EvaluationResult, + location: &VariableLocation, dwarf_type: &TypeInfo, var_name: &str, pc_address: u64, status_ptr: Option>, ) -> Result> { debug!( - "Converting EvaluationResult to LLVM value for variable: {}", + "Converting VariableLocation to LLVM value for variable: {}", var_name ); debug!("Evaluation context PC address: 0x{:x}", pc_address); - // Get pt_regs parameter let pt_regs_ptr = self.get_pt_regs_parameter()?; + let result_size = MemoryAccessSize::from_size(Self::get_dwarf_type_size(dwarf_type)); - match evaluation_result { - EvaluationResult::DirectValue(direct) => { - self.generate_direct_value(direct, pt_regs_ptr) + match location { + VariableLocation::RegisterValue { dwarf_reg } => { + debug!("Generating register value: {dwarf_reg}"); + self.load_register_value(*dwarf_reg, pt_regs_ptr) } - EvaluationResult::MemoryLocation(location) => { - self.generate_memory_location(location, pt_regs_ptr, dwarf_type, status_ptr) + VariableLocation::ComputedValue(steps) => { + debug!("Generating computed value: {} steps", steps.len()); + let runtime_status_ptr = if self.condition_context_active { + Some(self.get_or_create_cond_error_global()) + } else { + status_ptr + }; + self.generate_compute_steps( + steps, + pt_regs_ptr, + Some(result_size), + runtime_status_ptr, + None, + ) } - EvaluationResult::Optimized => { + VariableLocation::ImplicitValue(bytes) => { + debug!("Generating implicit value: {} bytes", bytes.len()); + let mut value: u64 = 0; + for (i, &byte) in bytes.iter().enumerate().take(8) { + value |= (byte as u64) << (i * 8); + } + Ok(self.context.i64_type().const_int(value, false).into()) + } + VariableLocation::AbsoluteAddressValue(_) => { + let runtime_status_ptr = if self.condition_context_active { + Some(self.get_or_create_cond_error_global()) + } else { + status_ptr + }; + self.variable_location_to_address_with_hint(location, runtime_status_ptr, None) + .map(Into::into) + } + VariableLocation::Address(_) + | VariableLocation::RegisterAddress { .. } + | VariableLocation::ComputedAddress(_) => { + self.generate_memory_location(location, dwarf_type, status_ptr) + } + VariableLocation::OptimizedOut => { debug!("Variable {} is optimized out", var_name); - // Return a placeholder value for optimized out variables - Ok(self.context.i64_type().const_zero().into()) + Err(Self::dwarf_expression_unavailable_error( + var_name, + &Availability::OptimizedOut, + pc_address, + )) } - EvaluationResult::Composite(members) => { + VariableLocation::Pieces(pieces) => { debug!( - "Variable {} is composite with {} members", + "Variable {} is composite with {} pieces", var_name, - members.len() + pieces.len() ); - // For now, just return the first member if available - if let Some(first_member) = members.first() { - self.evaluate_result_to_llvm_value( - &first_member.location, + if let Some(first_piece) = pieces.first() { + self.variable_location_to_llvm_value( + &first_piece.location, dwarf_type, var_name, pc_address, @@ -110,13 +147,19 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Ok(self.context.i64_type().const_zero().into()) } } + VariableLocation::FrameBaseRelative { .. } => Err(CodeGenError::DwarfError( + "Frame-base-relative variable plan requires resolved frame base".to_string(), + )), + VariableLocation::Unknown => Err(CodeGenError::DwarfError( + "Variable read plan has unknown location".to_string(), + )), } } /// Variant that allows passing an explicit module hint for offsets lookup - pub fn evaluation_result_to_address_with_hint( + pub fn variable_location_to_address_with_hint( &mut self, - evaluation_result: &EvaluationResult, + location: &VariableLocation, status_ptr: Option>, module_hint: Option<&str>, ) -> Result> { @@ -125,83 +168,41 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // always rebased using per-module section offsets (ASLR) to get a runtime address. // - Runtime-derived addresses (register/stack-relative or computed via dereference) // are used as-is and are NOT rebased. - // The caller signals which path we are on by providing the original evaluation_result. + // The caller signals which path we are on by providing the semantic location shape. let pt_regs_ptr = self.get_pt_regs_parameter()?; - // Default assumption: offsets are available unless a lookup proves otherwise. self.store_offsets_found_const(true)?; - match evaluation_result { - EvaluationResult::MemoryLocation(LocationResult::Address(addr)) => { - // Unified: always attempt runtime rebasing via proc_module_offsets - let ctx = self.get_compile_time_context()?; - let module_for_offsets = module_hint - .map(|s| s.to_string()) - .or_else(|| self.current_resolved_var_module_path.clone()) - .unwrap_or_else(|| ctx.module_path.clone()); - let st_code = self.section_code_for_address(&module_for_offsets, *addr); - let cookie = self.cookie_for_module_or_fallback(&module_for_offsets); - let link_val = self.context.i64_type().const_int(*addr, false); - let (rt_addr, found_flag) = - self.generate_runtime_address_from_offsets(link_val, st_code, cookie)?; - if let Some(sp) = status_ptr { - let is_miss = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - found_flag, - self.context.bool_type().const_zero(), - "is_off_miss", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let cur_status = self - .builder - .build_load(self.context.i8_type(), sp, "cur_status") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let is_ok = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - cur_status.into_int_value(), - self.context.i8_type().const_zero(), - "status_is_ok", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let should_store = self - .builder - .build_and(is_miss, is_ok, "store_offsets_unavail") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let new_status = self - .builder - .build_select( - should_store, - self.context - .i8_type() - .const_int( - ghostscope_protocol::VariableStatus::OffsetsUnavailable as u64, - false, - ) - .into(), - cur_status, - "new_status", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(sp, new_status) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - self.store_offsets_found_flag(found_flag)?; - self.current_resolved_var_module_path = None; - Ok(rt_addr) + match location { + VariableLocation::OptimizedOut => { + let pc_address = self + .current_compile_time_context + .as_ref() + .map(|ctx| ctx.pc_address) + .unwrap_or(0); + Err(Self::dwarf_expression_unavailable_error( + "DWARF address expression", + &Availability::OptimizedOut, + pc_address, + )) } - EvaluationResult::MemoryLocation(LocationResult::RegisterAddress { - register, - offset, - .. - }) => { - let reg_val = self.load_register_value(*register, pt_regs_ptr)?; + VariableLocation::Address(expr) => self.address_steps_to_address_with_hint( + &expr.steps, + pt_regs_ptr, + status_ptr, + module_hint, + ), + VariableLocation::AbsoluteAddressValue(expr) => self + .address_steps_to_address_with_hint( + &expr.steps, + pt_regs_ptr, + status_ptr, + module_hint, + ), + VariableLocation::RegisterAddress { dwarf_reg, offset } => { + let reg_val = self.load_register_value(*dwarf_reg, pt_regs_ptr)?; if let BasicValueEnum::IntValue(reg_i) = reg_val { - if let Some(ofs) = offset { - let ofs_val = self.context.i64_type().const_int(*ofs as u64, true); + if *offset != 0 { + let ofs_val = self.context.i64_type().const_int(*offset as u64, true); let sum = self .builder .build_int_add(reg_i, ofs_val, "addr_with_offset") @@ -216,215 +217,175 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { )) } } - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { steps }) => { - // Try to fold constant-only address expressions (e.g., global + const offset) - // If foldable, treat as link-time address and apply ASLR offsets via map. - let mut const_stack: Vec = Vec::new(); - let mut foldable = true; - for s in steps.iter() { - match s { - ComputeStep::PushConstant(v) => const_stack.push(*v), - ComputeStep::Add => { - if const_stack.len() >= 2 { - let b = const_stack.pop().unwrap(); - let a = const_stack.pop().unwrap(); - const_stack.push(a.saturating_add(b)); - } else { - foldable = false; - break; - } - } - // Any register load or deref means runtime-derived address; not foldable - ComputeStep::LoadRegister(_) | ComputeStep::Dereference { .. } => { - foldable = false; - break; - } - _ => { - // Unknown/non-add op: treat as non-foldable - foldable = false; - break; - } - } - } + VariableLocation::ComputedAddress(steps) => { + self.address_steps_to_address_with_hint(steps, pt_regs_ptr, status_ptr, module_hint) + } + _ => Err(CodeGenError::NotImplemented( + "Unable to compute address from variable location".to_string(), + )), + } + } - if foldable && const_stack.len() == 1 { - let link_addr_u = const_stack[0] as u64; - let ctx = self.get_compile_time_context()?; - let module_for_offsets = module_hint - .map(|s| s.to_string()) - .or_else(|| self.current_resolved_var_module_path.clone()) - .unwrap_or_else(|| ctx.module_path.clone()); - let st_code = self.section_code_for_address(&module_for_offsets, link_addr_u); - let cookie = self.cookie_for_module_or_fallback(&module_for_offsets); - let link_val = self.context.i64_type().const_int(link_addr_u, false); - let (rt_addr, found_flag) = - self.generate_runtime_address_from_offsets(link_val, st_code, cookie)?; - if let Some(sp) = status_ptr { - let is_miss = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - found_flag, - self.context.bool_type().const_zero(), - "is_off_miss", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let cur_status = self - .builder - .build_load(self.context.i8_type(), sp, "cur_status") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let is_ok = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - cur_status.into_int_value(), - self.context.i8_type().const_zero(), - "status_is_ok", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let should_store = self - .builder - .build_and(is_miss, is_ok, "store_offsets_unavail") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let new_status = self - .builder - .build_select( - should_store, - self.context - .i8_type() - .const_int( - ghostscope_protocol::VariableStatus::OffsetsUnavailable - as u64, - false, - ) - .into(), - cur_status, - "new_status", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(sp, new_status) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - self.current_resolved_var_module_path = None; - return Ok(rt_addr); - } + fn address_steps_to_address_with_hint( + &mut self, + steps: &[ComputeStep], + pt_regs_ptr: PointerValue<'ctx>, + status_ptr: Option>, + module_hint: Option<&str>, + ) -> Result> { + if let Some(link_addr) = Self::fold_constant_address_steps(steps) { + return self.runtime_address_from_link_time_address(link_addr, status_ptr, module_hint); + } - // Attempt: if steps start with PushConstant(base) and first dynamic op is Dereference - // (with no LoadRegister before it), apply ASLR offsets to base and continue - if let Some(ComputeStep::PushConstant(base_const)) = steps.first() { - // Scan until first Dereference or LoadRegister - let mut saw_reg = false; - let mut saw_deref = false; - for s in &steps[1..] { - match s { - ComputeStep::LoadRegister(_) => { - saw_reg = true; - break; - } - ComputeStep::Dereference { .. } => { - saw_deref = true; - break; - } - _ => {} - } + // If a static base is dereferenced before any register dependency, rebase + // the base first and then execute the remaining runtime expression. + if let Some(ComputeStep::PushConstant(base_const)) = steps.first() { + let mut saw_reg = false; + let mut saw_deref = false; + for step in &steps[1..] { + match step { + ComputeStep::LoadRegister(_) => { + saw_reg = true; + break; } - if saw_deref && !saw_reg { - let link_addr_u = *base_const as u64; - let ctx = self.get_compile_time_context()?; - let module_for_offsets = module_hint - .map(|s| s.to_string()) - .or_else(|| self.current_resolved_var_module_path.clone()) - .unwrap_or_else(|| ctx.module_path.clone()); - let st_code = - self.section_code_for_address(&module_for_offsets, link_addr_u); - let cookie = self.cookie_for_module_or_fallback(&module_for_offsets); - let link_val = self.context.i64_type().const_int(link_addr_u, false); - let (rt, found_flag) = - self.generate_runtime_address_from_offsets(link_val, st_code, cookie)?; - if let Some(sp) = status_ptr { - let is_miss = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - found_flag, - self.context.bool_type().const_zero(), - "is_off_miss", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let cur_status = self - .builder - .build_load(self.context.i8_type(), sp, "cur_status") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let is_ok = self - .builder - .build_int_compare( - inkwell::IntPredicate::EQ, - cur_status.into_int_value(), - self.context.i8_type().const_zero(), - "status_is_ok", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let should_store = self - .builder - .build_and(is_miss, is_ok, "store_offsets_unavail") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - let new_status = self - .builder - .build_select( - should_store, - self.context - .i8_type() - .const_int( - ghostscope_protocol::VariableStatus::OffsetsUnavailable - as u64, - false, - ) - .into(), - cur_status, - "new_status", - ) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - self.builder - .build_store(sp, new_status) - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - } - // Execute remaining steps with rt pre-pushed as base - let rest = &steps[1..]; - let val = self.generate_compute_steps( - rest, - pt_regs_ptr, - None, - status_ptr, - Some(rt), - )?; - if let BasicValueEnum::IntValue(i) = val { - return Ok(i); - } else { - return Err(CodeGenError::LLVMError( - "Computed location did not produce integer".to_string(), - )); - } + ComputeStep::Dereference { .. } => { + saw_deref = true; + break; } + _ => {} } + } - // Fallback: execute steps at runtime and use the result directly (no offsets) - let val = - self.generate_compute_steps(steps, pt_regs_ptr, None, status_ptr, None)?; - if let BasicValueEnum::IntValue(i) = val { - Ok(i) - } else { - Err(CodeGenError::LLVMError( + if saw_deref && !saw_reg { + let rt = self.runtime_address_from_link_time_address( + *base_const as u64, + status_ptr, + module_hint, + )?; + let val = self.generate_compute_steps( + &steps[1..], + pt_regs_ptr, + None, + status_ptr, + Some(rt), + )?; + return match val { + BasicValueEnum::IntValue(value) => Ok(value), + _ => Err(CodeGenError::LLVMError( "Computed location did not produce integer".to_string(), - )) - } + )), + }; } - _ => Err(CodeGenError::NotImplemented( - "Unable to compute address from evaluation result".to_string(), + } + + let val = self.generate_compute_steps(steps, pt_regs_ptr, None, status_ptr, None)?; + match val { + BasicValueEnum::IntValue(value) => Ok(value), + _ => Err(CodeGenError::LLVMError( + "Computed location did not produce integer".to_string(), )), } } + fn fold_constant_address_steps(steps: &[ComputeStep]) -> Option { + let mut const_stack: Vec = Vec::new(); + for step in steps { + match step { + ComputeStep::PushConstant(value) => const_stack.push(*value), + ComputeStep::Add => { + let b = const_stack.pop()?; + let a = const_stack.pop()?; + const_stack.push(a.saturating_add(b)); + } + _ => return None, + } + } + + if const_stack.len() == 1 && const_stack[0] >= 0 { + Some(const_stack[0] as u64) + } else { + None + } + } + + fn runtime_address_from_link_time_address( + &mut self, + link_addr: u64, + status_ptr: Option>, + module_hint: Option<&str>, + ) -> Result> { + let ctx = self.get_compile_time_context()?; + let module_for_offsets = module_hint + .map(|s| s.to_string()) + .or_else(|| self.current_resolved_var_module_path.clone()) + .unwrap_or_else(|| ctx.module_path.clone()); + let st_code = self.section_code_for_address(&module_for_offsets, link_addr); + let cookie = self.cookie_for_module_or_fallback(&module_for_offsets); + let link_val = self.context.i64_type().const_int(link_addr, false); + let (rt_addr, found_flag) = + self.generate_runtime_address_from_offsets(link_val, st_code, cookie)?; + self.store_offsets_unavailable_status(status_ptr, found_flag)?; + self.store_offsets_found_flag(found_flag)?; + self.current_resolved_var_module_path = None; + Ok(rt_addr) + } + + fn store_offsets_unavailable_status( + &self, + status_ptr: Option>, + found_flag: IntValue<'ctx>, + ) -> Result<()> { + let Some(sp) = status_ptr else { + return Ok(()); + }; + + let is_miss = self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + found_flag, + self.context.bool_type().const_zero(), + "is_off_miss", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let cur_status = self + .builder + .build_load(self.context.i8_type(), sp, "cur_status") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let is_ok = self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + cur_status.into_int_value(), + self.context.i8_type().const_zero(), + "status_is_ok", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let should_store = self + .builder + .build_and(is_miss, is_ok, "store_offsets_unavail") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + let new_status = self + .builder + .build_select( + should_store, + self.context + .i8_type() + .const_int( + ghostscope_protocol::VariableStatus::OffsetsUnavailable as u64, + false, + ) + .into(), + cur_status, + "new_status", + ) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + self.builder + .build_store(sp, new_status) + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + Ok(()) + } + /// Convert DWARF type size to MemoryAccessSize fn dwarf_type_to_memory_access_size(&self, dwarf_type: &TypeInfo) -> MemoryAccessSize { let size = Self::get_dwarf_type_size(dwarf_type); @@ -437,224 +398,68 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } - /// Generate LLVM IR for direct value result - fn generate_direct_value( - &mut self, - direct: &DirectValueResult, - pt_regs_ptr: PointerValue<'ctx>, - ) -> Result> { - match direct { - DirectValueResult::Constant(value) => { - debug!("Generating constant: {}", value); - Ok(self - .context - .i64_type() - .const_int(*value as u64, true) - .into()) - } - - DirectValueResult::AbsoluteAddress(value) => { - debug!("Generating rebased absolute address: 0x{value:x}"); - let module_hint = self.current_resolved_var_module_path.clone(); - let status_ptr = if self.condition_context_active { - Some(self.get_or_create_cond_error_global()) - } else { - None - }; - let eval = ghostscope_dwarf::EvaluationResult::MemoryLocation( - ghostscope_dwarf::LocationResult::Address(*value), - ); - self.evaluation_result_to_address_with_hint( - &eval, - status_ptr, - module_hint.as_deref(), - ) - .map(Into::into) - } - - DirectValueResult::ImplicitValue(bytes) => { - debug!("Generating implicit value: {} bytes", bytes.len()); - // Convert bytes to integer value (little-endian) - let mut value: u64 = 0; - for (i, &byte) in bytes.iter().enumerate().take(8) { - value |= (byte as u64) << (i * 8); - } - Ok(self.context.i64_type().const_int(value, false).into()) - } + pub(super) fn variable_read_plan_to_runtime_read_parts( + &self, + plan: VariableReadPlan, + pc_address: u64, + ) -> Result<(String, TypeInfo, VariableLocation)> { + let lowering = plan.bpf_lowering_plan(&self.compile_options.runtime_capabilities); + if !lowering.availability.is_available() + && lowering.availability != Availability::OptimizedOut + { + return Err(Self::dwarf_expression_unavailable_error( + &plan.name, + &lowering.availability, + pc_address, + )); + } - DirectValueResult::RegisterValue(reg_num) => { - debug!("Generating register value: {}", reg_num); - let reg_value = self.load_register_value(*reg_num, pt_regs_ptr)?; - Ok(reg_value) + let dwarf_type = if lowering.availability == Availability::OptimizedOut { + TypeInfo::OptimizedOut { + name: plan.name.clone(), } + } else { + plan.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) + })? + }; - DirectValueResult::ComputedValue { steps, result_size } => { - debug!("Generating computed value: {} steps", steps.len()); - let status_ptr = if self.condition_context_active { - Some(self.get_or_create_cond_error_global()) - } else { - None - }; - self.generate_compute_steps( - steps, - pt_regs_ptr, - Some(*result_size), - status_ptr, - None, - ) - } - } + Ok((plan.name, dwarf_type, plan.location)) } - /// Generate LLVM IR for memory location result + /// Generate LLVM IR for memory-backed variable locations. fn generate_memory_location( &mut self, - location: &LocationResult, - pt_regs_ptr: PointerValue<'ctx>, + location: &VariableLocation, dwarf_type: &TypeInfo, status_ptr: Option>, ) -> Result> { - match location { - // Policy note: - // We decide ASLR rebasing based on the DWARF evaluation RESULT SHAPE, not a - // "global variable" tag. Whenever DWARF yields a link-time address - // (LocationResult::Address) — including file-scope globals, static locals, - // rodata/data/bss, or any constant-folded address — we MUST apply per-module - // section offsets (.text/.rodata/.data/.bss) to obtain the runtime address. - // Conversely, for runtime-derived addresses (RegisterAddress or computed from - // registers/dereferences), we DO NOT rebase. - LocationResult::Address(addr) => { - debug!("Generating absolute address: 0x{:x}", addr); - // Convert link-time address to runtime address using ASLR offsets when available - let module_hint = self.current_resolved_var_module_path.clone(); - let runtime_status_ptr = if self.condition_context_active { - Some(self.get_or_create_cond_error_global()) - } else { - status_ptr - }; - let eval = ghostscope_dwarf::EvaluationResult::MemoryLocation( - ghostscope_dwarf::LocationResult::Address(*addr), - ); - let rt_addr = self.evaluation_result_to_address_with_hint( - &eval, - runtime_status_ptr, - module_hint.as_deref(), - )?; - // Aggregate types (struct/union/array) are represented as pointers in expressions - if self.is_aggregate_type(dwarf_type) { - let ptr_ty = self.context.ptr_type(inkwell::AddressSpace::default()); - let as_ptr = self - .builder - .build_int_to_ptr(rt_addr, ptr_ty, "aggregate_addr_as_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - return Ok(as_ptr.into()); - } - // Use DWARF type size for memory access - let access_size = self.dwarf_type_to_memory_access_size(dwarf_type); - if self.condition_context_active { - self.generate_memory_read_with_status(rt_addr, access_size) - } else { - self.generate_memory_read(rt_addr, access_size, status_ptr) - } - } - - LocationResult::RegisterAddress { - register, - offset, - size, - } => { - debug!( - "Generating register address: reg{} {:+}", - register, - offset.unwrap_or(0) - ); - - // Load register value - let reg_value = self.load_register_value(*register, pt_regs_ptr)?; - - // Add offset if present - let final_addr = if let Some(offset) = offset { - let offset_value = self.context.i64_type().const_int(*offset as u64, true); - if let BasicValueEnum::IntValue(reg_int) = reg_value { - self.builder - .build_int_add(reg_int, offset_value, "addr_with_offset") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))? - } else { - return Err(CodeGenError::RegisterMappingError( - "Register value is not integer".to_string(), - )); - } - } else if let BasicValueEnum::IntValue(reg_int) = reg_value { - reg_int - } else { - return Err(CodeGenError::RegisterMappingError( - "Register value is not integer".to_string(), - )); - }; - // Aggregate types: return pointer instead of reading as scalar - if self.is_aggregate_type(dwarf_type) { - let ptr_ty = self.context.ptr_type(inkwell::AddressSpace::default()); - let as_ptr = self - .builder - .build_int_to_ptr(final_addr, ptr_ty, "aggregate_addr_as_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - return Ok(as_ptr.into()); - } - // Determine memory access size - prefer LocationResult size if available, otherwise use DWARF type - let access_size = size - .map(|s| match s { - 1 => MemoryAccessSize::U8, - 2 => MemoryAccessSize::U16, - 4 => MemoryAccessSize::U32, - _ => MemoryAccessSize::U64, - }) - .unwrap_or_else(|| self.dwarf_type_to_memory_access_size(dwarf_type)); - - if self.condition_context_active { - self.generate_memory_read_with_status(final_addr, access_size) - } else { - self.generate_memory_read(final_addr, access_size, status_ptr) - } - } + let module_hint = self.current_resolved_var_module_path.clone(); + let runtime_status_ptr = if self.condition_context_active { + Some(self.get_or_create_cond_error_global()) + } else { + status_ptr + }; + let addr = self.variable_location_to_address_with_hint( + location, + runtime_status_ptr, + module_hint.as_deref(), + )?; + + if self.is_aggregate_type(dwarf_type) { + let ptr_ty = self.context.ptr_type(inkwell::AddressSpace::default()); + let as_ptr = self + .builder + .build_int_to_ptr(addr, ptr_ty, "aggregate_addr_as_ptr") + .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; + return Ok(as_ptr.into()); + } - LocationResult::ComputedLocation { steps } => { - debug!("Generating computed location: {} steps", steps.len()); - // Execute steps to compute the address - let runtime_status_ptr = if self.condition_context_active { - Some(self.get_or_create_cond_error_global()) - } else { - status_ptr - }; - let addr_value = self.generate_compute_steps( - steps, - pt_regs_ptr, - None, - runtime_status_ptr, - None, - )?; - if let BasicValueEnum::IntValue(addr) = addr_value { - // For aggregate types, return pointer to address instead of loading a value - if self.is_aggregate_type(dwarf_type) { - let ptr_ty = self.context.ptr_type(inkwell::AddressSpace::default()); - let as_ptr = self - .builder - .build_int_to_ptr(addr, ptr_ty, "aggregate_addr_as_ptr") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - return Ok(as_ptr.into()); - } - // Use DWARF type size for memory access - let access_size = self.dwarf_type_to_memory_access_size(dwarf_type); - if self.condition_context_active { - self.generate_memory_read_with_status(addr, access_size) - } else { - self.generate_memory_read(addr, access_size, status_ptr) - } - } else { - Err(CodeGenError::LLVMError( - "Address computation must return integer".to_string(), - )) - } - } + let access_size = self.dwarf_type_to_memory_access_size(dwarf_type); + if self.condition_context_active { + self.generate_memory_read_with_status(addr, access_size) + } else { + self.generate_memory_read(addr, access_size, status_ptr) } } @@ -1030,7 +835,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { fn generate_entry_value_lookup( &mut self, caller_pc_steps: &[ComputeStep], - cases: &[ghostscope_dwarf::core::EntryValueCase], + cases: &[EntryValueCase], pt_regs_ptr: PointerValue<'ctx>, result_size: Option, status_ptr: Option>, @@ -1175,16 +980,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Ok(phi.as_basic_value().into_int_value()) } - /// Query DWARF for complex expression (supports member access, array access, etc.) - pub fn query_dwarf_for_complex_expr( - &mut self, - expr: &crate::script::Expr, - ) -> Result> { - use crate::script::Expr; - - // Expand script alias variables inside the expression so downstream - // DWARF resolvers see the actual DWARF-based expression tree. - // Guard against self-referential or cyclic aliases. + fn expand_dwarf_aliases(&self, expr: &crate::script::Expr) -> Result { fn expand_aliases( ctx: &crate::ebpf::context::EbpfContext<'_, '_>, e: &crate::script::Expr, @@ -1246,7 +1042,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { ))); } if let Some(alias_expr) = ctx.get_alias_variable(head) { - // Expand the alias head, then append member segments let mut acc = expand_aliases(ctx, &alias_expr, visited, depth + 1)?; for seg in &chain[1..] { acc = E::MemberAccess(Box::new(acc), seg.clone()); @@ -1277,970 +1072,318 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } let mut visited = std::collections::HashSet::new(); - let expanded = expand_aliases(self, expr, &mut visited, 0)?; - - match &expanded { - // Simple variable lookup - Expr::Variable(var_name) => self.query_dwarf_for_variable(var_name), + expand_aliases(self, expr, &mut visited, 0) + } - // Member access: obj.field - Expr::MemberAccess(obj_expr, field_name) => { - self.query_dwarf_for_member_access(obj_expr, field_name) - } + pub(super) fn query_dwarf_for_complex_expr_plan( + &mut self, + expr: &crate::script::Expr, + ) -> Result> { + use crate::script::Expr; - // Array access: arr[index] - Expr::ArrayAccess(array_expr, index_expr) => { - self.query_dwarf_for_array_access(array_expr, index_expr) + let expanded = self.expand_dwarf_aliases(expr)?; + match &expanded { + Expr::Variable(var_name) => self.query_dwarf_for_variable_plan(var_name), + Expr::MemberAccess(_, _) + | Expr::ArrayAccess(_, _) + | Expr::ChainAccess(_) + | Expr::PointerDeref(_) => { + if let Some((base, access_path)) = Self::access_path_from_expr(&expanded)? { + self.query_dwarf_for_pc_access_plan(&base, &access_path) + } else { + Ok(None) + } } - - // Chain access: person.name.first - Expr::ChainAccess(chain) => self.query_dwarf_for_chain_access(chain), - - // Pointer dereference: *ptr - Expr::PointerDeref(expr) => self.query_dwarf_for_pointer_deref(expr), - - // Other expression types are not supported for DWARF queries _ => Ok(None), } } - /// Query DWARF for variable information - pub fn query_dwarf_for_variable( + /// Query DWARF for complex expression (supports member access, array access, etc.) + pub fn query_dwarf_for_complex_expr( + &mut self, + expr: &crate::script::Expr, + ) -> Result> { + self.query_dwarf_for_complex_expr_plan(expr) + } + + /// Query DWARF for a PC-sensitive local variable read plan. + fn query_dwarf_for_variable_plan( &mut self, var_name: &str, - ) -> Result> { + ) -> Result> { let context = self.get_compile_time_context()?; let pc_address = context.pc_address; let module_path = context.module_path.clone(); debug!( - "Querying DWARF for variable '{}' at PC 0x{:x} in module '{}'", + "Querying DWARF variable plan for '{}' at PC 0x{:x} in module '{}'", var_name, pc_address, module_path ); let analyzer = self .process_analyzer .ok_or_else(|| CodeGenError::DwarfError("No DWARF analyzer available".to_string()))?; - - let module_address = ghostscope_dwarf::ModuleAddress::new( - std::path::PathBuf::from(module_path.clone()), - pc_address, - ); - - let module_path_owned = module_path; - let lookup_globals = |analyzer: &ghostscope_dwarf::DwarfAnalyzer| -> Result< - Option<(std::path::PathBuf, VariableWithEvaluation)>, - > { - debug!( - "Variable '{}' not found in locals; attempting global lookup", - var_name - ); - let matches = analyzer.find_global_variables_by_name(var_name); - if matches.is_empty() { - return Ok(None); - } - - // Candidate ranking: - // 1) current module + link address - // 2) any module + link address - // 3) current module - // 4) all matches - let preferred: Vec<( - std::path::PathBuf, - ghostscope_dwarf::core::GlobalVariableInfo, - )> = matches - .iter() - .filter(|(p, _)| p.to_string_lossy() == module_path_owned.as_str()) - .cloned() - .collect(); - let preferred_with_addr: Vec<( - std::path::PathBuf, - ghostscope_dwarf::core::GlobalVariableInfo, - )> = preferred - .iter() - .filter(|(_, info)| info.link_address.is_some()) - .cloned() - .collect(); - let with_addr: Vec<( - std::path::PathBuf, - ghostscope_dwarf::core::GlobalVariableInfo, - )> = matches - .iter() - .filter(|(_, info)| info.link_address.is_some()) - .cloned() - .collect(); - - let candidates: Vec<( - std::path::PathBuf, - ghostscope_dwarf::core::GlobalVariableInfo, - )> = if !preferred_with_addr.is_empty() { - preferred_with_addr - } else if !with_addr.is_empty() { - with_addr - } else if !preferred.is_empty() { - preferred - } else { - matches - }; - - if candidates.len() == 1 { - let (mpath, info) = &candidates[0]; - let gv = analyzer - .resolve_variable_by_offsets_in_module( - mpath, - info.unit_offset, - info.die_offset, - ) - .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; - return Ok(Some((mpath.clone(), gv))); - } - - // Ambiguous candidates: resolve each candidate and prefer the one with a concrete type size. - let mut resolved: Vec<(std::path::PathBuf, VariableWithEvaluation)> = Vec::new(); - let mut resolved_with_size: Vec<(std::path::PathBuf, VariableWithEvaluation)> = - Vec::new(); - for (mpath, info) in candidates.iter() { - let gv = match analyzer.resolve_variable_by_offsets_in_module( - mpath, - info.unit_offset, - info.die_offset, - ) { - Ok(v) => v, - Err(err) => { - debug!( - "Skipping unresolved global candidate '{}' in '{}': {}", - var_name, - mpath.display(), - err - ); - continue; - } - }; - let ty_size = gv.dwarf_type.as_ref().map(|t| t.size()).unwrap_or(0); - if ty_size > 0 { - resolved_with_size.push((mpath.clone(), gv.clone())); - } - resolved.push((mpath.clone(), gv)); - } - - if resolved_with_size.len() == 1 { - return Ok(resolved_with_size.into_iter().next()); - } - - if resolved.len() == 1 { - return Ok(resolved.into_iter().next()); - } - - let ambiguous_count = if resolved_with_size.len() > 1 { - resolved_with_size.len() - } else if !resolved.is_empty() { - resolved.len() - } else { - candidates.len() - }; - debug!("Global '{var_name}' is ambiguous across modules ({ambiguous_count} candidates)"); - Err(CodeGenError::DwarfError(format!( - "Ambiguous global '{var_name}': {ambiguous_count} matches" - ))) - }; - - match analyzer.get_all_variables_at_address(&module_address) { - Ok(vars) => { - if let Some(var_result) = vars.iter().find(|v| v.name == var_name).or_else(|| { - let prefix = format!("{var_name}@"); - vars.iter().find(|v| v.name.starts_with(&prefix)) - }) { - debug!("Found DWARF variable '{}' in locals/params", var_name); - Ok(Some(var_result.clone())) - } else if let Some((mpath, gv)) = lookup_globals(analyzer)? { - self.current_resolved_var_module_path = - Some(mpath.to_string_lossy().to_string()); - Ok(Some(gv)) - } else { - Ok(None) + let prefer_module = std::path::PathBuf::from(module_path); + let module_address = + ghostscope_dwarf::ModuleAddress::new(prefer_module.clone(), pc_address); + + let pc_plan = match analyzer.resolve_pc(&module_address) { + Ok(pc_context) => match analyzer.plan_variable_by_name(&pc_context, var_name) { + Ok(Some(plan)) => { + debug!("Found DWARF variable '{}' via PC variable plan", var_name); + Some(plan) } - } - Err(e) => { - debug!( - "DWARF local lookup error for '{}': {e}; falling back to globals", - var_name - ); - if let Some((mpath, gv)) = lookup_globals(analyzer)? { - self.current_resolved_var_module_path = - Some(mpath.to_string_lossy().to_string()); - Ok(Some(gv)) - } else { - Ok(None) + Ok(None) => { + debug!( + "Variable '{}' not found in PC variable plan; trying global read plan", + var_name + ); + None } - } - } - } - - /// Get DWARF type size in bytes - pub fn get_dwarf_type_size(dwarf_type: &TypeInfo) -> u64 { - match dwarf_type { - TypeInfo::BaseType { size, .. } => *size, - TypeInfo::PointerType { size, .. } => *size, - TypeInfo::ArrayType { total_size, .. } => total_size.unwrap_or(0), - TypeInfo::StructType { size, .. } => *size, - TypeInfo::UnionType { size, .. } => *size, - TypeInfo::EnumType { size, .. } => *size, - TypeInfo::BitfieldType { - underlying_type, .. - } => { - // Read size equals the storage type size - Self::get_dwarf_type_size(underlying_type) - } - TypeInfo::TypedefType { - underlying_type, .. - } => Self::get_dwarf_type_size(underlying_type), - TypeInfo::QualifiedType { - underlying_type, .. - } => Self::get_dwarf_type_size(underlying_type), - TypeInfo::FunctionType { .. } => 8, // Function pointer size - TypeInfo::UnknownType { .. } => 0, - TypeInfo::OptimizedOut { .. } => 0, // Optimized out has no size - } - } - - /// Query DWARF for member access (obj.field) - pub fn query_dwarf_for_member_access( - &mut self, - obj_expr: &crate::script::Expr, - field_name: &str, - ) -> Result> { - // Generic path: try to resolve the base expression first and add constant member offset - if !matches!(obj_expr, crate::script::Expr::Variable(_)) { - if let Some(base_var) = self.query_dwarf_for_complex_expr(obj_expr)? { - if let Some(base_ty) = base_var.dwarf_type.as_ref() { - fn find_member_offset_and_type( - t: &ghostscope_dwarf::TypeInfo, - field: &str, - ) -> Option<(u64, ghostscope_dwarf::TypeInfo)> { - match t { - ghostscope_dwarf::TypeInfo::StructType { members, .. } - | ghostscope_dwarf::TypeInfo::UnionType { members, .. } => { - for m in members { - if m.name == field { - return Some((m.offset, m.member_type.clone())); - } - } - None - } - ghostscope_dwarf::TypeInfo::TypedefType { - underlying_type, .. - } - | ghostscope_dwarf::TypeInfo::QualifiedType { - underlying_type, .. - } => find_member_offset_and_type(underlying_type, field), - _ => None, - } - } - // Optional auto-deref for pointer-to-aggregate - let mut effective_ty = base_ty.clone(); - let mut effective_eval = base_var.evaluation_result.clone(); - // unwrap typedef/qualifier for pointer detection - fn unwrap_typedef( - mut t: &ghostscope_dwarf::TypeInfo, - ) -> &ghostscope_dwarf::TypeInfo { - while let ghostscope_dwarf::TypeInfo::TypedefType { - underlying_type, .. - } - | ghostscope_dwarf::TypeInfo::QualifiedType { - underlying_type, - .. - } = t - { - t = underlying_type.as_ref(); - } - t - } - let unwrapped = unwrap_typedef(&effective_ty); - if let ghostscope_dwarf::TypeInfo::PointerType { target_type, .. } = unwrapped { - // Insert a dereference step into evaluation - effective_eval = self.compute_pointer_dereference(&effective_eval)?; - effective_ty = *target_type.clone(); - } - - if let Some((member_off, member_ty)) = - find_member_offset_and_type(&effective_ty, field_name) + Err(err) => { + let message = err.to_string(); + if message.starts_with("Ambiguous variable") + || message.starts_with("Unavailable variable") { - use ghostscope_dwarf::{ - ComputeStep as CS, EvaluationResult as ER, LocationResult as LR, - }; - let new_eval = match &effective_eval { - ER::MemoryLocation(LR::Address(a)) => { - ER::MemoryLocation(LR::Address(a + member_off)) - } - ER::MemoryLocation(LR::ComputedLocation { steps }) => { - let mut s = steps.clone(); - s.push(CS::PushConstant(member_off as i64)); - s.push(CS::Add); - ER::MemoryLocation(LR::ComputedLocation { steps: s }) - } - ER::MemoryLocation(LR::RegisterAddress { - register, - offset, - size, - }) => { - let new_off = offset.unwrap_or(0).saturating_add(member_off as i64); - ER::MemoryLocation(LR::RegisterAddress { - register: *register, - offset: Some(new_off), - size: *size, - }) - } - _ => { - return Err(CodeGenError::NotImplemented( - "Member access on non-addressable expression".to_string(), - )) - } - }; - let name = format!("{}.{}", base_var.name, field_name); - let v = VariableWithEvaluation { - name, - type_name: member_ty.type_name(), - dwarf_type: Some(member_ty), - evaluation_result: new_eval, - scope_depth: base_var.scope_depth, - is_parameter: base_var.is_parameter, - is_artificial: base_var.is_artificial, - }; - return Ok(Some(v)); - } - } - } - // Try a planner-based chain if generic path missed and base is a pure identifier chain - { - fn flatten_ident_chain<'a>( - e: &'a crate::script::Expr, - out: &mut Vec<&'a str>, - ) -> bool { - match e { - crate::script::Expr::Variable(name) => { - out.push(name.as_str()); - true - } - crate::script::Expr::MemberAccess(obj, field) => { - if flatten_ident_chain(obj, out) { - out.push(field.as_str()); - true - } else { - false - } - } - _ => false, + return Err(CodeGenError::DwarfError(message)); } + debug!( + "PC variable plan lookup error for '{}': {message}; trying global read plan", + var_name + ); + None } - let mut segs: Vec<&str> = Vec::new(); - if flatten_ident_chain(obj_expr, &mut segs) && !segs.is_empty() { - let mut chain: Vec = segs.into_iter().map(|s| s.to_string()).collect(); - chain.push(field_name.to_string()); - return self.query_dwarf_for_chain_access(&chain); - } - } - // fall through to legacy variable-only behavior - } - // Support simple variable base and fall back to global/static lowering - if let crate::script::Expr::Variable(base_name) = obj_expr { - let ctx = self.get_compile_time_context()?; - let module_path = ctx.module_path.clone(); - let pc_address = ctx.pc_address; - let analyzer = self.process_analyzer.ok_or_else(|| { - CodeGenError::DwarfError("No DWARF analyzer available".to_string()) - })?; - let module_address = ghostscope_dwarf::ModuleAddress::new( - std::path::PathBuf::from(module_path.clone()), - pc_address, - ); - // Try current module at PC first - match analyzer.plan_chain_access(&module_address, base_name, &[field_name.to_string()]) - { - Ok(Some(var)) => return Ok(Some(var)), - Ok(None) => {} - Err(e) => { - tracing::debug!("member planner miss at current module: {}", e); - } + }, + Err(err) => { + debug!( + "PC context resolution failed for '{}': {err}; trying global read plan", + var_name + ); + None } + }; - // Strict cross-module chain planning via analyzer API - match analyzer - .plan_global_chain_access( - &std::path::PathBuf::from(module_path.clone()), - base_name, - &[field_name.to_string()], - ) - .map_err(|e| CodeGenError::DwarfError(e.to_string()))? - { - Some((mpath, v)) => { - self.current_resolved_var_module_path = - Some(mpath.to_string_lossy().to_string()); - Ok(Some(v)) - } - None => { - // Friendly unknown-member message for globals: try to resolve the base's type - // and list available members. - // We only attempt this for globals to avoid scanning locals aggressively. - let mut matches = analyzer.find_global_variables_by_name(base_name); - if !matches.is_empty() { - // Prefer current module - let preferred: Vec<( - std::path::PathBuf, - ghostscope_dwarf::core::GlobalVariableInfo, - )> = matches - .iter() - .filter(|(p, _)| p.to_string_lossy() == module_path.as_str()) - .cloned() - .collect(); - let chosen = if preferred.len() == 1 { - Some(preferred[0].clone()) - } else if preferred.is_empty() && matches.len() == 1 { - Some(matches.remove(0)) - } else { - None - }; - if let Some((mp, info)) = chosen { - if let Ok(var) = analyzer.resolve_variable_by_offsets_in_module( - &mp, - info.unit_offset, - info.die_offset, - ) { - if let Some(ty) = var.dwarf_type.as_ref() { - // Unwrap aliases - let mut t = ty; - loop { - match t { - ghostscope_dwarf::TypeInfo::TypedefType { - underlying_type, - .. - } => t = underlying_type.as_ref(), - ghostscope_dwarf::TypeInfo::QualifiedType { - underlying_type, - .. - } => t = underlying_type.as_ref(), - _ => break, - } - } - let mut kind: Option<&'static str> = None; - let mut member_names: Vec = Vec::new(); - match t { - ghostscope_dwarf::TypeInfo::StructType { - members, .. - } => { - kind = Some("struct"); - member_names = - members.iter().map(|m| m.name.clone()).collect(); - } - ghostscope_dwarf::TypeInfo::UnionType { - members, .. - } => { - kind = Some("union"); - member_names = - members.iter().map(|m| m.name.clone()).collect(); - } - _ => {} - } - if let Some(k) = kind { - // Form friendly message consistent with tests - // Example: Unknown member 'no_such_member' in struct 'G_STATE'. Known members: a, b, c - member_names.sort(); - member_names.dedup(); - let list = if member_names.is_empty() { - "".to_string() - } else { - member_names.join(", ") - }; - let msg = format!( - "Unknown member '{field_name}' in {k} '{base_name}' (known members: {list})" - ); - return Err(CodeGenError::TypeError(msg)); - } - } - } - } - } - Ok(None) - } - } - } else { - Err(CodeGenError::NotImplemented( - "MemberAccess base must be a simple variable (use chain access)".to_string(), - )) + if pc_plan.is_some() { + return Ok(pc_plan); } - } - /// Query DWARF for array access (arr[index]) - pub fn query_dwarf_for_array_access( - &mut self, - array_expr: &crate::script::Expr, - index_expr: &crate::script::Expr, - ) -> Result> { - // Prefer planner for simple identifier chains like a.b.c as array base to avoid nested member lookups - if let crate::script::Expr::MemberAccess(_, _) = array_expr { - // Try to flatten to a chain of identifiers - fn flatten_chain<'a>(e: &'a crate::script::Expr, out: &mut Vec<&'a str>) -> bool { - match e { - crate::script::Expr::Variable(name) => { - out.push(name.as_str()); - true - } - crate::script::Expr::MemberAccess(obj, field) => { - if flatten_chain(obj, out) { - out.push(field.as_str()); - true - } else { - false - } - } - _ => false, - } - } - let mut segs: Vec<&str> = Vec::new(); - if flatten_chain(array_expr, &mut segs) && !segs.is_empty() { - let ctx = self.get_compile_time_context()?; - let module_path = ctx.module_path.clone(); - let pc_address = ctx.pc_address; - let analyzer = self.process_analyzer.ok_or_else(|| { - CodeGenError::DwarfError("No DWARF analyzer available".to_string()) - })?; - let module_address = ghostscope_dwarf::ModuleAddress::new( - std::path::PathBuf::from(module_path), - pc_address, - ); - let base = segs[0].to_string(); - let rest: Vec = segs[1..].iter().map(|s| s.to_string()).collect(); - if let Ok(Some(var)) = analyzer.plan_chain_access(&module_address, &base, &rest) { - // Use planner result as array base - let base_var = var; - return self.finish_array_access_from_base(base_var, index_expr); - } - } + if let Some((global_module, plan)) = analyzer + .plan_global_chain_access_read_plan(&prefer_module, var_name, &[]) + .map_err(|err| CodeGenError::DwarfError(err.to_string()))? + { + debug!("Found DWARF global '{}' via variable read plan", var_name); + self.current_resolved_var_module_path = + Some(global_module.to_string_lossy().to_string()); + return Ok(Some(plan)); } - // Fallback: resolve the base array via generic complex expr path - let base_var = match self.query_dwarf_for_complex_expr(array_expr)? { - Some(var) => var, - None => return Ok(None), - }; - - self.finish_array_access_from_base(base_var, index_expr) + debug!("Variable '{var_name}' not found in read plans"); + Ok(None) } - fn finish_array_access_from_base( - &mut self, - base_var: VariableWithEvaluation, - index_expr: &crate::script::Expr, - ) -> Result> { - // Get the array's type - let array_type = match &base_var.dwarf_type { - Some(type_info) => type_info, - None => return Ok(None), - }; - - // Extract element type from array type - let element_type = match array_type { - TypeInfo::ArrayType { element_type, .. } => element_type.as_ref().clone(), - _ => return Ok(None), // Not an array type - }; - - // Calculate element size for address computation - let element_size = element_type.size(); - - // For indexing, create a computed location representing: base + (index * element_size) - // Only literal integer indices are supported at this stage - let index_value: i64 = match index_expr { - crate::script::Expr::Int(v) => *v, - _ => { - return Err(CodeGenError::NotImplemented( - "Only literal integer array indices are supported (TODO)".to_string(), - )) - } - }; - let element_evaluation_result = match &base_var.evaluation_result { - EvaluationResult::DirectValue(_) => { - // If base is a value, we can't do array indexing - return Ok(None); - } - EvaluationResult::MemoryLocation(location) => { - match location { - // Address(base): perform Address arithmetic so ASLR logic applies uniformly - LocationResult::Address(addr) => { - let offs = (index_value as i128) * (element_size as i128); - let new_addr = (*addr as i128).saturating_add(offs); - if new_addr < 0 { - return Err(CodeGenError::LLVMError( - "negative address after indexing".to_string(), - )); - } - EvaluationResult::MemoryLocation(LocationResult::Address(new_addr as u64)) - } - // Register/Computed: build compute steps at runtime - _ => { - let array_access_steps = - self.create_array_access_steps(location, element_size, index_value); - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { - steps: array_access_steps, - }) - } - } - } - EvaluationResult::Optimized => { - return Ok(None); - } - EvaluationResult::Composite(_) => { - // Array access on composite locations is complex, skip for now - return Ok(None); - } - }; - - // Build readable element name: base_name[index] - let elem_name = format!("{}[{}]", base_var.name, index_value); - let element_var = VariableWithEvaluation { - name: elem_name, - type_name: Self::type_info_to_name(&element_type), - dwarf_type: Some(element_type), - evaluation_result: element_evaluation_result, - scope_depth: base_var.scope_depth, - is_parameter: false, - is_artificial: false, - }; + /// Query DWARF for variable information + pub fn query_dwarf_for_variable(&mut self, var_name: &str) -> Result> { + let context = self.get_compile_time_context()?; + let pc_address = context.pc_address; + + debug!( + "Querying DWARF for variable '{}' at PC 0x{:x} in module '{}'", + var_name, pc_address, context.module_path + ); - Ok(Some(element_var)) + self.query_dwarf_for_variable_plan(var_name) } - /// Query DWARF for chain access (person.name.first) - pub fn query_dwarf_for_chain_access( + fn query_dwarf_for_pc_access_plan( &mut self, - chain: &[String], - ) -> Result> { - if chain.is_empty() { - return Ok(None); - } - // If chain has only one element, treat it as a simple variable and reuse variable lookup. - if chain.len() == 1 { - return self.query_dwarf_for_variable(&chain[0]); + base_name: &str, + access_path: &VariableAccessPath, + ) -> Result> { + if access_path.segments.is_empty() { + return self.query_dwarf_for_variable_plan(base_name); } - // Planner path only; do not fallback. If planning fails, surface an error. - let ctx = self.get_compile_time_context()?; - let module_path = ctx.module_path.clone(); - let pc_address = ctx.pc_address; + + let path_text = Self::access_path_to_string(base_name, access_path); + let context = self.get_compile_time_context()?; + let pc_address = context.pc_address; + let module_path = context.module_path.clone(); + let prefer_module = std::path::PathBuf::from(module_path.clone()); let analyzer = self .process_analyzer .ok_or_else(|| CodeGenError::DwarfError("No DWARF analyzer available".to_string()))?; - // First attempt: current module at current PC (locals/params) - let module_address = ghostscope_dwarf::ModuleAddress::new( - std::path::PathBuf::from(module_path.clone()), - pc_address, - ); - match analyzer.plan_chain_access(&module_address, &chain[0], &chain[1..]) { - Ok(Some(var)) => return Ok(Some(var)), - Ok(None) => {} - Err(e) => { - // Treat planner errors as a miss and continue to global fallback - tracing::debug!("chain planner miss at current module: {}", e); + let module_address = + ghostscope_dwarf::ModuleAddress::new(prefer_module.clone(), pc_address); + + match analyzer.resolve_pc(&module_address) { + Ok(pc_context) => { + match analyzer.plan_variable_access_by_name(&pc_context, base_name, access_path) { + Ok(Some(plan)) => { + debug!("Found DWARF access '{path_text}' via PC variable access plan"); + return Ok(Some(plan)); + } + Ok(None) => {} + Err(err) => { + let message = err.to_string(); + debug!( + "PC variable access plan lookup failed for '{path_text}': {message}" + ); + return Err(CodeGenError::DwarfError(message)); + } + } + } + Err(err) => { + debug!( + "PC context resolution failed for '{path_text}': {err}; trying global read plan" + ); } } - let base = &chain[0]; - let rest = &chain[1..]; - match analyzer - .plan_global_chain_access(&std::path::PathBuf::from(module_path.clone()), base, rest) - .map_err(|e| CodeGenError::DwarfError(e.to_string()))? + if let Some((module_path, plan)) = analyzer + .plan_global_access_read_plan(&prefer_module, base_name, access_path) + .map_err(|err| CodeGenError::DwarfError(err.to_string()))? { - Some((mpath, v)) => { - self.current_resolved_var_module_path = Some(mpath.to_string_lossy().to_string()); - Ok(Some(v)) - } - None => { - // Friendly message for unknown member on global in simple two-segment chains - if chain.len() == 2 { - let field_name = &chain[1]; - let mut matches = analyzer.find_global_variables_by_name(base); - if !matches.is_empty() { - let preferred: Vec<( - std::path::PathBuf, - ghostscope_dwarf::core::GlobalVariableInfo, - )> = matches - .iter() - .filter(|(p, _)| p.to_string_lossy() == module_path.as_str()) - .cloned() - .collect(); - let chosen = if preferred.len() == 1 { - Some(preferred[0].clone()) - } else if preferred.is_empty() && matches.len() == 1 { - Some(matches.remove(0)) - } else { - None - }; - if let Some((mp, info)) = chosen { - if let Ok(var) = analyzer.resolve_variable_by_offsets_in_module( - &mp, - info.unit_offset, - info.die_offset, - ) { - if let Some(ty) = var.dwarf_type.as_ref() { - // Unwrap typedef/qualified - let mut t = ty; - loop { - match t { - ghostscope_dwarf::TypeInfo::TypedefType { - underlying_type, - .. - } => t = underlying_type.as_ref(), - ghostscope_dwarf::TypeInfo::QualifiedType { - underlying_type, - .. - } => t = underlying_type.as_ref(), - _ => break, - } - } - let mut kind: Option<&'static str> = None; - let mut member_names: Vec = Vec::new(); - match t { - ghostscope_dwarf::TypeInfo::StructType { - members, .. - } => { - kind = Some("struct"); - member_names = - members.iter().map(|m| m.name.clone()).collect(); - } - ghostscope_dwarf::TypeInfo::UnionType { - members, .. - } => { - kind = Some("union"); - member_names = - members.iter().map(|m| m.name.clone()).collect(); - } - _ => {} - } - if let Some(k) = kind { - member_names.sort(); - member_names.dedup(); - let list = if member_names.is_empty() { - "".to_string() - } else { - member_names.join(", ") - }; - let msg = format!( - "Unknown member '{field_name}' in {k} '{base}' (known members: {list})" - ); - return Err(CodeGenError::TypeError(msg)); - } - } - } - } - } + debug!("Found DWARF global access '{path_text}' via variable read plan"); + self.current_resolved_var_module_path = Some(module_path.to_string_lossy().to_string()); + return Ok(Some(plan)); + } + + Ok(None) + } + + fn access_path_to_string(base_name: &str, access_path: &VariableAccessPath) -> String { + let mut out = base_name.to_string(); + for segment in &access_path.segments { + match segment { + VariableAccessSegment::Field(field) => { + out.push('.'); + out.push_str(field); + } + VariableAccessSegment::ArrayIndex(index) => { + out.push('['); + out.push_str(&index.to_string()); + out.push(']'); + } + VariableAccessSegment::Dereference => { + out.push_str(".*"); } - Ok(None) } } - // unreachable + out } - /// Query DWARF for pointer dereference (*ptr) - pub fn query_dwarf_for_pointer_deref( - &mut self, + fn access_path_from_expr( expr: &crate::script::Expr, - ) -> Result> { - // First, resolve the pointer expression - let ptr_var = match self.query_dwarf_for_complex_expr(expr)? { - Some(var) => var, - None => return Ok(None), - }; - - // Get the pointer's type - let ptr_type = match &ptr_var.dwarf_type { - Some(type_info) => type_info, - None => return Ok(None), - }; - - // Extract pointed-to type from pointer type - let mut pointed_type = match ptr_type { - TypeInfo::PointerType { target_type, .. } => target_type.as_ref().clone(), - _ => return Ok(None), // Not a pointer type - }; - - // Upgrade UnknownType(target_name) using analyzer/type index to get a shallow type. - // 1) Struct/union/class/enum: try analyzer shallow lookup by name (module-scoped first) - // 2) Do not guess builtin type sizes — rely only on DWARF base type entries - if let TypeInfo::UnknownType { name } = &pointed_type { - let mut candidate_names: Vec = Vec::new(); - if !name.is_empty() && name != "void" { - candidate_names.push(name.clone()); - } - // Fallback: derive from pointer variable's pretty type name, e.g., "GlobalState*" => "GlobalState" - if candidate_names.is_empty() { - let tn = ptr_var.type_name.trim().to_string(); - if let Some(idx) = tn.find('*') { - let mut base = tn[..idx].trim().to_string(); - // Strip common qualifiers and tags - for prefix in [ - "const ", - "volatile ", - "restrict ", - "struct ", - "class ", - "union ", - ] { - if base.starts_with(prefix) { - base = base[prefix.len()..].trim().to_string(); - } - } - if !base.is_empty() && base != "void" { - candidate_names.push(base); - } + ) -> Result> { + fn append_segments( + expr: &crate::script::Expr, + segments: &mut Vec, + ) -> Result> { + match expr { + crate::script::Expr::Variable(name) => Ok(Some(name.clone())), + crate::script::Expr::ChainAccess(chain) => { + let Some(base) = chain.first() else { + return Ok(None); + }; + segments.extend(chain[1..].iter().cloned().map(VariableAccessSegment::Field)); + Ok(Some(base.clone())) } - } - let ctx = self.get_compile_time_context()?; - let module_path = ctx.module_path.clone(); - if let Some(analyzer) = self.process_analyzer { - let mut alias_used: Option = None; - for n in candidate_names { - // Prefer cross-module definitions first to avoid forward decls with size=0 in current CU - let mut upgraded: Option = None; - // struct/class - if let Some(ti) = analyzer.resolve_struct_type_shallow_by_name(&n) { - if ti.size() > 0 { - upgraded = Some(ti); - } - } - if upgraded.is_none() { - if let Some(ti) = - analyzer.resolve_struct_type_shallow_by_name_in_module(&module_path, &n) - { - if ti.size() > 0 { - upgraded = Some(ti); - } - } - } - // union - if upgraded.is_none() { - if let Some(ti) = analyzer.resolve_union_type_shallow_by_name(&n) { - if ti.size() > 0 { - upgraded = Some(ti); - } - } - } - if upgraded.is_none() { - if let Some(ti) = - analyzer.resolve_union_type_shallow_by_name_in_module(&module_path, &n) - { - if ti.size() > 0 { - upgraded = Some(ti); - } - } - } - // enum - if upgraded.is_none() { - if let Some(ti) = analyzer.resolve_enum_type_shallow_by_name(&n) { - if ti.size() > 0 { - upgraded = Some(ti); - } - } - } - if upgraded.is_none() { - if let Some(ti) = - analyzer.resolve_enum_type_shallow_by_name_in_module(&module_path, &n) - { - if ti.size() > 0 { - upgraded = Some(ti); - } - } - } - if let Some(ti) = upgraded { - pointed_type = ti; - alias_used = Some(n.clone()); - break; - } + crate::script::Expr::MemberAccess(obj, field) => { + let Some(base) = append_segments(obj, segments)? else { + return Ok(None); + }; + segments.push(VariableAccessSegment::Field(field.clone())); + Ok(Some(base)) } - - // If we upgraded to an aggregate and have an alias name, wrap it as a typedef - if let Some(alias) = alias_used { - match &pointed_type { - TypeInfo::StructType { .. } - | TypeInfo::UnionType { .. } - | TypeInfo::EnumType { .. } => { - pointed_type = TypeInfo::TypedefType { - name: alias, - underlying_type: Box::new(pointed_type.clone()), - }; - } - _ => {} - } + crate::script::Expr::ArrayAccess(array, index) => { + let Some(base) = append_segments(array, segments)? else { + return Ok(None); + }; + let crate::script::Expr::Int(index) = index.as_ref() else { + return Err(CodeGenError::NotImplemented( + "Only literal integer array indices are supported (TODO)".to_string(), + )); + }; + segments.push(VariableAccessSegment::ArrayIndex(*index)); + Ok(Some(base)) + } + crate::script::Expr::PointerDeref(inner) => { + let Some(base) = append_segments(inner, segments)? else { + return Ok(None); + }; + segments.push(VariableAccessSegment::Dereference); + Ok(Some(base)) } + _ => Ok(None), } } - // Create dereferenced variable - let deref_var = VariableWithEvaluation { - name: format!("*{}", Self::expr_to_string(expr)), - type_name: Self::type_info_to_name(&pointed_type), - dwarf_type: Some(pointed_type), - evaluation_result: self.compute_pointer_dereference(&ptr_var.evaluation_result)?, - scope_depth: ptr_var.scope_depth, - is_parameter: false, - is_artificial: false, + let mut segments = Vec::new(); + let Some(base) = append_segments(expr, &mut segments)? else { + return Ok(None); }; + Ok(Some((base, VariableAccessPath::new(segments)))) + } - Ok(Some(deref_var)) + /// Get DWARF type size in bytes + pub fn get_dwarf_type_size(dwarf_type: &TypeInfo) -> u64 { + match dwarf_type { + TypeInfo::BaseType { size, .. } => *size, + TypeInfo::PointerType { size, .. } => *size, + TypeInfo::ArrayType { total_size, .. } => total_size.unwrap_or(0), + TypeInfo::StructType { size, .. } => *size, + TypeInfo::UnionType { size, .. } => *size, + TypeInfo::EnumType { size, .. } => *size, + TypeInfo::BitfieldType { + underlying_type, .. + } => { + // Read size equals the storage type size + Self::get_dwarf_type_size(underlying_type) + } + TypeInfo::TypedefType { + underlying_type, .. + } => Self::get_dwarf_type_size(underlying_type), + TypeInfo::QualifiedType { + underlying_type, .. + } => Self::get_dwarf_type_size(underlying_type), + TypeInfo::FunctionType { .. } => 8, // Function pointer size + TypeInfo::UnknownType { .. } => 0, + TypeInfo::OptimizedOut { .. } => 0, // Optimized out has no size + } } /// Helper: Compute pointer dereference fn compute_pointer_dereference( &self, - ptr_result: &EvaluationResult, - ) -> Result { - use ghostscope_dwarf::{ComputeStep, LocationResult, MemoryAccessSize}; - - match ptr_result { - // If the pointer is a memory location, we need to read that location first, - // then use the result as an address for another read - EvaluationResult::MemoryLocation(location) => { - let steps = [ - self.location_to_compute_steps(location), - // Then dereference the pointer (read from the computed address) - vec![ComputeStep::Dereference { - size: MemoryAccessSize::U64, - }], - ] - .concat(); - - Ok(EvaluationResult::MemoryLocation( - LocationResult::ComputedLocation { steps }, - )) + ptr_location: &VariableLocation, + ) -> Result { + match ptr_location { + VariableLocation::AbsoluteAddressValue(expr) => { + Ok(VariableLocation::Address(expr.clone())) } - // If the pointer value is held directly (common for function parameters) - // interpret the value as an address to the pointed-to object. - EvaluationResult::DirectValue(dv) => { - use ghostscope_dwarf::DirectValueResult as DV; - match dv { - DV::RegisterValue(reg) => Ok(EvaluationResult::MemoryLocation( - LocationResult::RegisterAddress { - register: *reg, - offset: None, - size: None, - }, - )), - DV::Constant(val) => Ok(EvaluationResult::MemoryLocation( - LocationResult::Address(*val as u64), - )), - DV::AbsoluteAddress(val) => Ok(EvaluationResult::MemoryLocation( - LocationResult::Address(*val), - )), - DV::ImplicitValue(bytes) => { - // Assemble up to 8 bytes little-endian into u64 - let mut v: u64 = 0; - for (i, b) in bytes.iter().take(8).enumerate() { - v |= (*b as u64) << (8 * i); - } - Ok(EvaluationResult::MemoryLocation(LocationResult::Address(v))) - } - DV::ComputedValue { steps, .. } => Ok(EvaluationResult::MemoryLocation( - LocationResult::ComputedLocation { - steps: steps.clone(), - }, - )), + VariableLocation::Address(_) + | VariableLocation::RegisterAddress { .. } + | VariableLocation::ComputedAddress(_) => { + let mut steps = self.variable_location_to_compute_steps(ptr_location)?; + steps.push(ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }); + Ok(VariableLocation::ComputedAddress(steps)) + } + VariableLocation::RegisterValue { dwarf_reg } => { + Ok(VariableLocation::RegisterAddress { + dwarf_reg: *dwarf_reg, + offset: 0, + }) + } + VariableLocation::ComputedValue(steps) => { + Ok(VariableLocation::ComputedAddress(steps.clone())) + } + VariableLocation::ImplicitValue(bytes) => { + let mut value: u64 = 0; + for (i, byte) in bytes.iter().take(8).enumerate() { + value |= (*byte as u64) << (8 * i); } + Ok(VariableLocation::Address(AddressExpr::constant(value))) } _ => Err(CodeGenError::NotImplemented( "Unsupported pointer dereference scenario".to_string(), @@ -2249,134 +1392,91 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } /// Helper: Convert location to compute steps - fn location_to_compute_steps(&self, location: &LocationResult) -> Vec { - use ghostscope_dwarf::{ComputeStep, LocationResult}; - + fn variable_location_to_compute_steps( + &self, + location: &VariableLocation, + ) -> Result> { match location { - LocationResult::Address(addr) => { - vec![ComputeStep::PushConstant(*addr as i64)] - } - LocationResult::RegisterAddress { - register, offset, .. - } => { - let mut steps = vec![ComputeStep::LoadRegister(*register)]; - if let Some(offset) = offset { + VariableLocation::Address(expr) => Ok(expr.steps.clone()), + VariableLocation::AbsoluteAddressValue(expr) => Ok(expr.steps.clone()), + VariableLocation::RegisterAddress { dwarf_reg, offset } => { + let mut steps = vec![ComputeStep::LoadRegister(*dwarf_reg)]; + if *offset != 0 { steps.push(ComputeStep::PushConstant(*offset)); steps.push(ComputeStep::Add); } - steps + Ok(steps) } - LocationResult::ComputedLocation { steps } => steps.clone(), - } - } - - /// Helper: Convert expression to string for debugging - fn expr_to_string(expr: &crate::script::Expr) -> String { - use crate::script::Expr; - - match expr { - Expr::Variable(name) => name.clone(), - Expr::MemberAccess(obj, field) => format!("{}.{}", Self::expr_to_string(obj), field), - Expr::ArrayAccess(arr, _) => format!("{}[index]", Self::expr_to_string(arr)), - Expr::ChainAccess(chain) => chain.join("."), - Expr::PointerDeref(expr) => format!("*{}", Self::expr_to_string(expr)), - _ => "expr".to_string(), - } - } - - /// Helper: Extract readable name from TypeInfo - fn type_info_to_name(type_info: &TypeInfo) -> String { - match type_info { - TypeInfo::BaseType { name, .. } => name.clone(), - TypeInfo::PointerType { target_type, .. } => { - format!("{}*", Self::type_info_to_name(target_type)) + VariableLocation::ComputedAddress(steps) | VariableLocation::ComputedValue(steps) => { + Ok(steps.clone()) } - TypeInfo::ArrayType { - element_type, - element_count, - .. - } => { - if let Some(count) = element_count { - format!("{}[{}]", Self::type_info_to_name(element_type), count) - } else { - format!("{}[]", Self::type_info_to_name(element_type)) - } + VariableLocation::RegisterValue { dwarf_reg } => { + Ok(vec![ComputeStep::LoadRegister(*dwarf_reg)]) } - TypeInfo::StructType { name, .. } => format!("struct {name}"), - TypeInfo::UnionType { name, .. } => format!("union {name}"), - TypeInfo::EnumType { name, .. } => format!("enum {name}"), - TypeInfo::BitfieldType { - underlying_type, - bit_offset, - bit_size, - } => { - format!( - "bitfield<{}:{}> {}", - bit_offset, - bit_size, - Self::type_info_to_name(underlying_type) - ) + VariableLocation::ImplicitValue(bytes) => { + let mut value: u64 = 0; + for (i, byte) in bytes.iter().take(8).enumerate() { + value |= (*byte as u64) << (8 * i); + } + Ok(vec![ComputeStep::PushConstant(value as i64)]) } - TypeInfo::TypedefType { name, .. } => name.clone(), - TypeInfo::QualifiedType { - underlying_type, .. - } => Self::type_info_to_name(underlying_type), - TypeInfo::FunctionType { .. } => "function".to_string(), - TypeInfo::UnknownType { name } => name.clone(), - TypeInfo::OptimizedOut { name } => format!(" {name}"), + _ => Err(CodeGenError::NotImplemented( + "Unable to convert variable location to compute steps".to_string(), + )), } } - /// Create computation steps for array access: base_address + (index * element_size) - fn create_array_access_steps( + fn add_variable_location_offset( &self, - base_location: &LocationResult, - element_size: u64, - index: i64, - ) -> Vec { - let mut steps = Vec::new(); + location: VariableLocation, + offset: i64, + ) -> Result { + if offset == 0 { + return Ok(location); + } - // First, get the base address computation steps - match base_location { - LocationResult::Address(addr) => { - steps.push(ComputeStep::PushConstant(*addr as i64)); + match location { + VariableLocation::RegisterAddress { + dwarf_reg, + offset: base_offset, + } => Ok(VariableLocation::RegisterAddress { + dwarf_reg, + offset: base_offset.saturating_add(offset), + }), + VariableLocation::Address(expr) => { + let mut steps = expr.steps; + steps.push(ComputeStep::PushConstant(offset)); + steps.push(ComputeStep::Add); + Ok(VariableLocation::ComputedAddress(steps)) } - LocationResult::RegisterAddress { - register, offset, .. - } => { - steps.push(ComputeStep::LoadRegister(*register)); - if let Some(offset) = offset { - if *offset != 0 { - steps.push(ComputeStep::PushConstant(*offset)); - steps.push(ComputeStep::Add); - } - } + VariableLocation::AbsoluteAddressValue(expr) => { + let mut steps = expr.steps; + steps.push(ComputeStep::PushConstant(offset)); + steps.push(ComputeStep::Add); + Ok(VariableLocation::AbsoluteAddressValue(AddressExpr { + steps, + })) } - LocationResult::ComputedLocation { steps: base_steps } => { - steps.extend(base_steps.clone()); + VariableLocation::ComputedAddress(mut steps) => { + steps.push(ComputeStep::PushConstant(offset)); + steps.push(ComputeStep::Add); + Ok(VariableLocation::ComputedAddress(steps)) } + _ => Err(CodeGenError::NotImplemented( + "Unable to apply pointer arithmetic to variable location".to_string(), + )), } - - // Now add array indexing computation: current_address + (index * element_size) - steps.push(ComputeStep::PushConstant(index)); // literal index - steps.push(ComputeStep::PushConstant(element_size as i64)); // element_size - steps.push(ComputeStep::Mul); // index * element_size - steps.push(ComputeStep::Add); // base_address + (index * element_size) - - steps } /// Compute a typed pointed-to location for expressions like `ptr +/- K` where K is an element index. - /// Returns a computed location EvaluationResult along with the pointed-to DWARF type. + /// Returns a computed location along with the pointed-to DWARF type. /// The offset is scaled by the element size of the pointer/array target type. pub fn compute_pointed_location_with_index( &mut self, ptr_expr: &crate::script::Expr, index: i64, - ) -> Result<(EvaluationResult, TypeInfo)> { - use ghostscope_dwarf::{ - ComputeStep, EvaluationResult as ER, LocationResult as LR, TypeInfo, - }; + ) -> Result<(VariableLocation, TypeInfo)> { + use ghostscope_dwarf::TypeInfo; // Resolve the pointer expression via DWARF let ptr_var = self @@ -2427,37 +1527,96 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } }; - // First compute the base pointed-to location for `*ptr_expr` - let base_loc_eval = self.compute_pointer_dereference(&ptr_var.evaluation_result)?; - let base_loc = match &base_loc_eval { - ER::MemoryLocation(loc) => loc, - _ => { - return Err(CodeGenError::DwarfError( - "Failed to compute base location for pointer arithmetic".to_string(), - )) - } - }; - - // Build compute steps: base_address + index * elem_size - let steps = { - let mut s = self.location_to_compute_steps(base_loc); - // scale index by element size (can be negative) - s.push(ComputeStep::PushConstant(index)); - s.push(ComputeStep::PushConstant(elem_size as i64)); - s.push(ComputeStep::Mul); - s.push(ComputeStep::Add); - s - }; + let base_location = self.compute_pointer_dereference(&ptr_var.location)?; + let byte_offset = index.saturating_mul(elem_size as i64); + let location = self.add_variable_location_offset(base_location, byte_offset)?; - Ok((ER::MemoryLocation(LR::ComputedLocation { steps }), elem_ty)) + Ok((location, elem_ty)) } } #[cfg(test)] mod tests { use super::*; + use crate::script::Expr; + use ghostscope_dwarf::Provenance; use inkwell::context::Context as LlvmContext; + #[test] + fn access_path_from_expr_flattens_member_array_member_paths() { + let expr = Expr::MemberAccess( + Box::new(Expr::ArrayAccess( + Box::new(Expr::MemberAccess( + Box::new(Expr::Variable("request".to_string())), + "headers".to_string(), + )), + Box::new(Expr::Int(2)), + )), + "len".to_string(), + ); + + let (base, path) = EbpfContext::<'static, 'static>::access_path_from_expr(&expr) + .expect("access path should parse") + .expect("expression should be flattenable"); + + assert_eq!(base, "request"); + assert_eq!( + path.segments, + vec![ + VariableAccessSegment::Field("headers".to_string()), + VariableAccessSegment::ArrayIndex(2), + VariableAccessSegment::Field("len".to_string()), + ] + ); + assert_eq!( + EbpfContext::<'static, 'static>::access_path_to_string(&base, &path), + "request.headers[2].len" + ); + } + + #[test] + fn access_path_from_expr_rejects_dynamic_array_index() { + let expr = Expr::ArrayAccess( + Box::new(Expr::Variable("items".to_string())), + Box::new(Expr::Variable("idx".to_string())), + ); + + let err = EbpfContext::<'static, 'static>::access_path_from_expr(&expr) + .expect_err("dynamic array index should be rejected"); + + assert!(matches!(err, CodeGenError::NotImplemented(_))); + assert!(err.to_string().contains("literal integer array indices")); + } + + #[test] + fn access_path_from_expr_flattens_pointer_deref_segments() { + let expr = Expr::MemberAccess( + Box::new(Expr::PointerDeref(Box::new(Expr::MemberAccess( + Box::new(Expr::Variable("request".to_string())), + "current".to_string(), + )))), + "state".to_string(), + ); + + let (base, path) = EbpfContext::<'static, 'static>::access_path_from_expr(&expr) + .expect("access path should parse") + .expect("expression should be flattenable"); + + assert_eq!(base, "request"); + assert_eq!( + path.segments, + vec![ + VariableAccessSegment::Field("current".to_string()), + VariableAccessSegment::Dereference, + VariableAccessSegment::Field("state".to_string()), + ] + ); + assert_eq!( + EbpfContext::<'static, 'static>::access_path_to_string(&base, &path), + "request.current.*.state" + ); + } + #[test] fn aggregate_address_returns_pointer_for_struct_and_array() { let llctx = LlvmContext::create(); @@ -2478,9 +1637,9 @@ mod tests { size: 80, members: vec![], }; - let eval = EvaluationResult::MemoryLocation(LocationResult::Address(0x1000)); + let location = VariableLocation::Address(AddressExpr::constant(0x1000)); let v = ctx - .evaluate_result_to_llvm_value(&eval, &st, "S", 0, None) + .variable_location_to_llvm_value(&location, &st, "S", 0, None) .expect("eval"); match v { BasicValueEnum::PointerValue(_) => {} @@ -2498,7 +1657,7 @@ mod tests { total_size: Some(16), }; let v2 = ctx - .evaluate_result_to_llvm_value(&eval, &arr, "A", 0, None) + .variable_location_to_llvm_value(&location, &arr, "A", 0, None) .expect("eval2"); match v2 { BasicValueEnum::PointerValue(_) => {} @@ -2525,9 +1684,9 @@ mod tests { size: 4, encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, }; - let eval = EvaluationResult::MemoryLocation(LocationResult::Address(0x2000)); + let location = VariableLocation::Address(AddressExpr::constant(0x2000)); let v = ctx - .evaluate_result_to_llvm_value(&eval, &bt, "x", 0, None) + .variable_location_to_llvm_value(&location, &bt, "x", 0, None) .expect("eval"); match v { BasicValueEnum::IntValue(_) => {} @@ -2539,6 +1698,175 @@ mod tests { ); } + #[test] + fn absolute_address_value_lowers_as_rebased_direct_value() { + let llctx = LlvmContext::create(); + let opts = crate::CompileOptions::default(); + let mut ctx = EbpfContext::new(&llctx, "abs_addr_value", Some(0), &opts).expect("ctx"); + ctx.create_basic_ebpf_function("f").expect("fn"); + ctx.__test_ensure_proc_offsets_map().expect("map"); + ctx.__test_alloc_pm_key().expect("pm_key"); + ctx.set_compile_time_context(0, "/nonexistent/module".to_string()); + + let ptr_ty = ghostscope_protocol::TypeInfo::PointerType { + target_type: Box::new(ghostscope_protocol::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }), + size: 8, + }; + let location = VariableLocation::AbsoluteAddressValue(AddressExpr::constant(0x2000)); + + let value = ctx + .variable_location_to_llvm_value(&location, &ptr_ty, "ptr", 0, None) + .expect("absolute address value should lower"); + assert!(matches!(value, BasicValueEnum::IntValue(_))); + + let pointee = ctx + .compute_pointer_dereference(&location) + .expect("absolute address value should dereference to memory"); + assert_eq!( + pointee, + VariableLocation::Address(AddressExpr::constant(0x2000)) + ); + } + + #[test] + fn optimized_result_is_rejected_as_unavailable_value() { + let llctx = LlvmContext::create(); + let opts = crate::CompileOptions::default(); + let mut ctx = EbpfContext::new(&llctx, "optimized_value", Some(0), &opts).expect("ctx"); + ctx.create_basic_ebpf_function("f").expect("fn"); + + let ty = ghostscope_protocol::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }; + + let err = ctx + .variable_location_to_llvm_value( + &VariableLocation::OptimizedOut, + &ty, + "x", + 0x1234, + None, + ) + .expect_err("optimized value should not lower to a placeholder"); + + assert!( + matches!(err, CodeGenError::VariableUnavailable(_)), + "unexpected error: {err:?}" + ); + assert!(err.to_string().contains("optimized out")); + assert!(err.to_string().contains("0x1234")); + } + + #[test] + fn unavailable_error_formats_structured_dwarf_reason() { + let err = EbpfContext::dwarf_expression_unavailable_error( + "x", + &Availability::Unsupported(ghostscope_dwarf::UnsupportedReason::ExpressionShape { + detail: "estimated BPF stack use 64 bytes exceeds capability limit 16".to_string(), + }), + 0xbeef, + ); + let message = err.to_string(); + + assert!(matches!(err, CodeGenError::VariableUnavailable(_))); + assert!(message.contains("unsupported DWARF expression shape")); + assert!(message.contains("estimated BPF stack use 64 bytes")); + assert!(!message.contains("ExpressionShape")); + } + + #[test] + fn unavailable_error_formats_runtime_requirement() { + let err = EbpfContext::dwarf_expression_unavailable_error( + "ptr", + &Availability::Requires(ghostscope_dwarf::RuntimeRequirement::UserMemoryRead), + 0xcafe, + ); + let message = err.to_string(); + + assert!(matches!(err, CodeGenError::VariableUnavailable(_))); + assert!(message.contains("user-memory read support")); + assert!(!message.contains("UserMemoryRead")); + } + + #[test] + fn read_plan_lowering_uses_compile_option_runtime_capabilities() { + let llctx = LlvmContext::create(); + let mut opts = crate::CompileOptions::default(); + opts.runtime_capabilities.max_bpf_stack_bytes = 0; + let ctx = EbpfContext::new(&llctx, "runtime_caps", Some(0), &opts).expect("ctx"); + let dwarf_type = ghostscope_protocol::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }; + let plan = VariableReadPlan { + name: "x".to_string(), + type_name: "int".to_string(), + dwarf_type: Some(dwarf_type), + declaration: None, + type_id: None, + location: VariableLocation::Address(AddressExpr::constant(0x1000)), + availability: Availability::Available, + scope_depth: 0, + is_parameter: false, + is_artificial: false, + pc_range: None, + inline_context: None, + provenance: Provenance::DirectDie, + }; + + let err = ctx + .variable_read_plan_to_runtime_read_parts(plan, 0x1234) + .expect_err("zero stack capability should reject the read plan"); + + assert!(matches!(err, CodeGenError::VariableUnavailable(_))); + assert!(err.to_string().contains("capability limit 0")); + } + + #[test] + fn optimized_out_read_plan_preserves_marker_conversion() { + let llctx = LlvmContext::create(); + let opts = crate::CompileOptions::default(); + let ctx = EbpfContext::new(&llctx, "optimized_marker", Some(0), &opts).expect("ctx"); + let dwarf_type = ghostscope_protocol::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }; + let plan = VariableReadPlan { + name: "x".to_string(), + type_name: "int".to_string(), + dwarf_type: Some(dwarf_type), + declaration: None, + type_id: None, + location: VariableLocation::OptimizedOut, + availability: Availability::OptimizedOut, + scope_depth: 0, + is_parameter: false, + is_artificial: false, + pc_range: None, + inline_context: None, + provenance: Provenance::DirectDie, + }; + + let (_, marker_type, location) = ctx + .variable_read_plan_to_runtime_read_parts(plan, 0x1234) + .expect("optimized-out runtime metadata should remain printable"); + assert_eq!(location, VariableLocation::OptimizedOut); + assert_eq!( + marker_type, + TypeInfo::OptimizedOut { + name: "x".to_string() + } + ); + } + #[test] fn computed_location_supports_dereference_before_trailing_arithmetic() { let llctx = LlvmContext::create(); @@ -2549,19 +1877,17 @@ mod tests { ctx.__test_alloc_pm_key().expect("pm_key"); ctx.set_compile_time_context(0, "/nonexistent/module".to_string()); - let eval = EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { - steps: vec![ - ComputeStep::PushConstant(0x3000), - ComputeStep::Dereference { - size: MemoryAccessSize::U64, - }, - ComputeStep::PushConstant(16), - ComputeStep::Add, - ], - }); + let location = VariableLocation::ComputedAddress(vec![ + ComputeStep::PushConstant(0x3000), + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ComputeStep::PushConstant(16), + ComputeStep::Add, + ]); let addr = ctx - .evaluation_result_to_address_with_hint(&eval, None, None) + .variable_location_to_address_with_hint(&location, None, None) .expect("computed address with mid-stream dereference should compile"); assert_eq!(addr.get_type().get_bit_width(), 64); } diff --git a/ghostscope-compiler/src/ebpf/expression.rs b/ghostscope-compiler/src/ebpf/expression.rs index 1364fe62..82ed9fbc 100644 --- a/ghostscope-compiler/src/ebpf/expression.rs +++ b/ghostscope-compiler/src/ebpf/expression.rs @@ -5,7 +5,10 @@ use super::context::{CodeGenError, EbpfContext, Result}; use crate::script::{BinaryOp, Expr}; use aya_ebpf_bindings::bindings::bpf_func_id::BPF_FUNC_probe_read_user; -use ghostscope_dwarf::TypeInfo as DwarfType; +use ghostscope_dwarf::{ + AmbiguityReason, Availability, RuntimeRequirement, TypeInfo as DwarfType, UnsupportedReason, + VariableReadPlan, +}; use inkwell::values::{BasicValueEnum, IntValue}; use inkwell::AddressSpace; use tracing::debug; @@ -342,8 +345,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - return self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + return self.variable_location_to_address_with_hint( + &var.location, status_ptr, module_hint.as_deref(), ); @@ -371,8 +374,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - return self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + return self.variable_location_to_address_with_hint( + &var.location, status_ptr, module_hint.as_deref(), ); @@ -435,8 +438,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } match dty { DwarfType::PointerType { .. } => { - let val_any = self.evaluate_result_to_llvm_value( - &var.evaluation_result, + let val_any = self.variable_location_to_llvm_value( + &var.location, dty, &var.name, self.get_compile_time_context()?.pc_address, @@ -461,8 +464,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + self.variable_location_to_address_with_hint( + &var.location, status_ptr, module_hint.as_deref(), ) @@ -478,8 +481,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + self.variable_location_to_address_with_hint( + &var.location, status_ptr, module_hint.as_deref(), ) @@ -1071,8 +1074,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } match ty { DwarfType::PointerType { .. } => { - let val_any = self.evaluate_result_to_llvm_value( - &var.evaluation_result, + let val_any = self.variable_location_to_llvm_value( + &var.location, ty, &var.name, self.get_compile_time_context()?.pc_address, @@ -1098,8 +1101,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + self.variable_location_to_address_with_hint( + &var.location, status_ptr, module_hint.as_deref(), )? @@ -1692,8 +1695,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { ) })?; let module_hint = self.current_resolved_var_module_path.clone(); - match self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + match self.variable_location_to_address_with_hint( + &var.location, None, module_hint.as_deref(), ) { @@ -1727,8 +1730,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { })?; // Use current resolved hint if available (set during DWARF resolution) let module_hint = self.current_resolved_var_module_path.clone(); - match self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + match self.variable_location_to_address_with_hint( + &var.location, None, module_hint.as_deref(), ) { @@ -2178,7 +2181,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Query DWARF for the complex expression let compile_context = self.get_compile_time_context()?.clone(); - let variable_with_eval = match self.query_dwarf_for_complex_expr(expr)? { + let variable_plan = match self.query_dwarf_for_complex_expr(expr)? { Some(var) => var, None => { let expr_str = Self::expr_to_debug_string(expr); @@ -2186,25 +2189,120 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } }; - let dwarf_type = variable_with_eval.dwarf_type.as_ref().ok_or_else(|| { + let dwarf_type = variable_plan.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) })?; + Self::ensure_dwarf_value_available(&variable_plan, compile_context.pc_address)?; debug!( "compile_dwarf_expression: Found DWARF info for expression '{}' with type: {:?}", - variable_with_eval.name, dwarf_type + variable_plan.name, dwarf_type ); // Use the unified evaluation logic to generate LLVM IR - self.evaluate_result_to_llvm_value( - &variable_with_eval.evaluation_result, + self.variable_location_to_llvm_value( + &variable_plan.location, dwarf_type, - &variable_with_eval.name, + &variable_plan.name, compile_context.pc_address, None, ) } + pub(crate) fn dwarf_expression_unavailable_error( + name: &str, + availability: &Availability, + pc_address: u64, + ) -> CodeGenError { + let reason = Self::format_availability_reason(availability); + CodeGenError::VariableUnavailable(format!( + "'{name}' is {reason}; cannot use it as a value expression at PC 0x{pc_address:x}" + )) + } + + fn format_availability_reason(availability: &Availability) -> String { + match availability { + Availability::OptimizedOut => "optimized out at the selected probe PC".to_string(), + Availability::NotInScope => "not in scope at the selected probe PC".to_string(), + Availability::Unsupported(reason) => { + format!( + "unsupported DWARF semantic shape: {}", + Self::format_unsupported_reason(reason) + ) + } + Availability::Requires(requirement) => { + format!( + "requires unavailable runtime support: {}", + Self::format_runtime_requirement(requirement) + ) + } + Availability::Ambiguous(reason) => { + format!( + "ambiguous DWARF semantic result: {}", + Self::format_ambiguity_reason(reason) + ) + } + Availability::Available | Availability::PartiallyAvailable => "available".to_string(), + } + } + + fn format_unsupported_reason(reason: &UnsupportedReason) -> String { + match reason { + UnsupportedReason::DwarfOp { op } => format!("unsupported DWARF op {op}"), + UnsupportedReason::ExpressionShape { detail } => { + format!("unsupported DWARF expression shape: {detail}") + } + UnsupportedReason::TypeLayout { detail } => { + format!("unsupported type layout: {detail}") + } + UnsupportedReason::AddressClass { detail } => { + format!("unsupported address class: {detail}") + } + UnsupportedReason::RegisterMapping { dwarf_reg } => { + format!("unsupported DWARF register mapping for register {dwarf_reg}") + } + } + } + + fn format_runtime_requirement(requirement: &RuntimeRequirement) -> &'static str { + match requirement { + RuntimeRequirement::CallerFrame => "caller-frame recovery", + RuntimeRequirement::SleepableUprobe => "sleepable uprobe support", + RuntimeRequirement::UserMemoryRead => "user-memory read support", + RuntimeRequirement::DwarfCfiRecovery => "DWARF CFI recovery", + } + } + + fn format_ambiguity_reason(reason: &AmbiguityReason) -> String { + match reason { + AmbiguityReason::InlineContext { detail } => { + format!("ambiguous inline context: {detail}") + } + AmbiguityReason::VariableDeclaration { detail } => { + format!("ambiguous variable declaration: {detail}") + } + AmbiguityReason::TypeResolution { detail } => { + format!("ambiguous type resolution: {detail}") + } + } + } + + pub(crate) fn ensure_dwarf_value_available( + variable: &VariableReadPlan, + pc_address: u64, + ) -> Result<()> { + let availability = variable.availability.clone(); + if availability.is_available() { + Ok(()) + } else { + Err(Self::dwarf_expression_unavailable_error( + &variable.name, + &availability, + pc_address, + )) + } + } + /// Helper: Convert expression to string for debugging fn expr_to_debug_string(expr: &crate::script::Expr) -> String { use crate::script::Expr; @@ -2300,8 +2398,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - let addr = self.evaluation_result_to_address_with_hint( - &var.evaluation_result, + let addr = self.variable_location_to_address_with_hint( + &var.location, status_ptr, module_hint.as_deref(), )?; @@ -2325,8 +2423,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } // Evaluate expression to pointer value and read up to L+1 bytes - let val_any = self.evaluate_result_to_llvm_value( - &var.evaluation_result, + let val_any = self.variable_location_to_llvm_value( + &var.location, var.dwarf_type.as_ref().unwrap(), &var.name, self.get_compile_time_context()?.pc_address, diff --git a/ghostscope-compiler/src/lib.rs b/ghostscope-compiler/src/lib.rs index 4623ae33..f023b2ea 100644 --- a/ghostscope-compiler/src/lib.rs +++ b/ghostscope-compiler/src/lib.rs @@ -1,16 +1,14 @@ // Keep library clippy-clean without allow attributes -// New modular organization pub mod ebpf; -pub mod script; // New instruction generator - // Legacy codegen - kept for reference, not compiled - // pub mod codegen_legacy; - // pub mod codegen_new; +pub mod script; use crate::script::compiler::AstCompiler; use ebpf::context::CodeGenError; +pub use ghostscope_dwarf::RuntimeCapabilities; pub use ghostscope_process::{PidFilterSpec, PidNamespaceId}; use script::parser::ParseError; +use std::borrow::Cow; use tracing::info; pub fn hello() -> &'static str { @@ -40,6 +38,31 @@ impl From for CompileError { } } +impl CompileError { + pub fn user_message(&self) -> Cow<'_, str> { + match self { + CompileError::Parse(err) => Cow::Owned(format!("Parse error: {err}")), + CompileError::CodeGen(err) => err.user_message(), + CompileError::LLVM(message) | CompileError::Other(message) => Cow::Borrowed(message), + } + } +} + +impl CodeGenError { + pub fn user_message(&self) -> Cow<'_, str> { + match self { + CodeGenError::VariableNotInScope(name) => { + Cow::Owned(format!("Use of variable '{name}' outside of its scope")) + } + CodeGenError::VariableUnavailable(message) => Cow::Borrowed(message), + CodeGenError::TypeSizeNotAvailable(name) => Cow::Owned(format!( + "Variable '{name}' has no concrete DWARF size at this probe PC" + )), + _ => Cow::Owned(self.to_string()), + } + } +} + // Public re-exports from script::compiler module pub use script::compiler::{CompilationResult, UProbeConfig}; @@ -92,6 +115,8 @@ pub struct CompileOptions { /// Optional original `-p` input PID for `$input_pid`. /// This is only available in `-p` mode. pub input_pid: Option, + /// Runtime/backend capabilities used to validate DWARF variable read plans. + pub runtime_capabilities: RuntimeCapabilities, } impl Default for CompileOptions { @@ -113,6 +138,7 @@ impl Default for CompileOptions { special_pid_ns: None, proc_offsets_pid_ns: None, input_pid: None, + runtime_capabilities: RuntimeCapabilities::default(), } } } @@ -223,3 +249,30 @@ pub fn generate_file_name_for_ast(pid: Option, binary_path: Option<&str>) - format!("gs_{pid_part}_{exec_part}_ast") } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn user_message_strips_codegen_prefix_for_unavailable_variable() { + let err = CompileError::CodeGen(CodeGenError::VariableUnavailable( + "'x' is optimized out at the selected probe PC".to_string(), + )); + + assert_eq!( + err.user_message().as_ref(), + "'x' is optimized out at the selected probe PC" + ); + } + + #[test] + fn user_message_formats_scope_errors_for_users() { + let err = CompileError::CodeGen(CodeGenError::VariableNotInScope("x".to_string())); + + assert_eq!( + err.user_message().as_ref(), + "Use of variable 'x' outside of its scope" + ); + } +} diff --git a/ghostscope-compiler/src/script/compiler.rs b/ghostscope-compiler/src/script/compiler.rs index b1ae2e44..3544b306 100644 --- a/ghostscope-compiler/src/script/compiler.rs +++ b/ghostscope-compiler/src/script/compiler.rs @@ -4,6 +4,7 @@ use crate::CompileError; use inkwell::context::Context; use std::borrow::Cow; use std::collections::hash_map::DefaultHasher; +use std::fmt::Write as _; use std::hash::{Hash, Hasher}; use std::path::Path; use tracing::{debug, error, info, warn}; @@ -133,7 +134,7 @@ impl<'a> AstCompiler<'a> { } Err(e) => { failed_trace_points += 1; - let error_msg = e.to_string(); + let error_msg = e.user_message().into_owned(); error!( "❌ Failed to process trace point {}: {:?} - Error: {}", index, pattern, error_msg @@ -154,6 +155,14 @@ impl<'a> AstCompiler<'a> { file_path, line_number, } => ft.target_name == format!("{file_path}:{line_number}"), + TracePattern::Address(addr) => { + ft.target_name == format!("0x{addr:x}") + && ft.pc_address == *addr + } + TracePattern::AddressInModule { module, address } => { + ft.target_name == format!("{module}:0x{address:x}") + && ft.pc_address == *address + } _ => false, }); @@ -164,12 +173,21 @@ impl<'a> AstCompiler<'a> { file_path, line_number, } => format!("{file_path}:{line_number}"), + TracePattern::Address(addr) => format!("0x{addr:x}"), + TracePattern::AddressInModule { module, address } => { + format!("{module}:0x{address:x}") + } _ => format!("trace_point_{index}"), }; + let pc_address = match pattern { + TracePattern::Address(addr) => *addr, + TracePattern::AddressInModule { address, .. } => *address, + _ => 0, + }; self.failed_targets.push(FailedTarget { target_name, - pc_address: 0, + pc_address, error_message: error_msg, }); } @@ -197,7 +215,7 @@ impl<'a> AstCompiler<'a> { // All trace points failed - return error with first failure reason error!("All {} trace points failed to process", failed_trace_points); return Err(CompileError::Other( - first_error.unwrap_or_else(|| "All trace points failed".to_string()), + self.format_all_trace_points_failed_error(first_error), )); } @@ -209,15 +227,34 @@ impl<'a> AstCompiler<'a> { self.uprobe_configs.len() ); + let trace_count = self.uprobe_configs.len(); Ok(CompilationResult { uprobe_configs: std::mem::take(&mut self.uprobe_configs), failed_targets: std::mem::take(&mut self.failed_targets), - trace_count: self.uprobe_configs.len(), + trace_count, target_info, next_available_trace_id: self.current_trace_id, }) } + fn format_all_trace_points_failed_error(&self, first_error: Option) -> String { + let mut message = first_error.unwrap_or_else(|| "All trace points failed".to_string()); + if self.failed_targets.is_empty() { + return message; + } + + message.push_str("\n\nFailed targets:\n"); + for failed in &self.failed_targets { + let _ = writeln!( + message, + " - {} at 0x{:x}: {}", + failed.target_name, failed.pc_address, failed.error_message + ); + } + message.push_str("\nTip: fix the reported compile-time errors above."); + message + } + /// Build a detailed error message for SourceLine resolution failures fn describe_source_line_failure(&mut self, file_path: &str, line_number: u32) -> String { let default_msg = @@ -411,7 +448,7 @@ impl<'a> AstCompiler<'a> { self.failed_targets.push(FailedTarget { target_name: format!("{file_path}:{line_number}"), pc_address: module_address.address, - error_message: e.to_string(), + error_message: e.user_message().into_owned(), }); // Continue processing other addresses @@ -494,13 +531,17 @@ impl<'a> AstCompiler<'a> { Ok(()) } Err(e) => { - let error_msg = e.to_string(); + let error_msg = e.user_message().into_owned(); + error!( + "❌ Failed to generate eBPF for address 0x{:x}: {}", + addr, error_msg + ); self.failed_targets.push(FailedTarget { target_name: format!("0x{addr:x}"), pc_address: *addr, - error_message: error_msg.clone(), + error_message: error_msg, }); - Err(CompileError::Other(error_msg)) + Err(e) } } } @@ -578,13 +619,17 @@ impl<'a> AstCompiler<'a> { Ok(()) } Err(e) => { - let error_msg = e.to_string(); + let error_msg = e.user_message().into_owned(); + error!( + "❌ Failed to generate eBPF for module-qualified address {}:0x{:x}: {}", + module, address, error_msg + ); self.failed_targets.push(FailedTarget { target_name: format!("{module}:0x{address:x}"), pc_address: *address, - error_message: error_msg.clone(), + error_message: error_msg, }); - Err(CompileError::Other(error_msg)) + Err(e) } } } @@ -674,7 +719,7 @@ impl<'a> AstCompiler<'a> { self.failed_targets.push(FailedTarget { target_name: func_name.clone(), pc_address: module_address.address, - error_message: e.to_string(), + error_message: e.user_message().into_owned(), }); // Continue processing other addresses @@ -752,8 +797,8 @@ impl<'a> AstCompiler<'a> { } } - // Use new codegen implementation with full AST compilation - let mut codegen_new = crate::ebpf::context::NewCodeGen::new_with_process_analyzer( + // Use the eBPF context implementation with full AST compilation. + let mut codegen = crate::ebpf::context::EbpfContext::new_with_process_analyzer( &context, &ebpf_function_name, self.process_analyzer, @@ -764,7 +809,7 @@ impl<'a> AstCompiler<'a> { // Set compile-time context for DWARF queries if let Some(function_address) = target.function_address { - codegen_new.set_compile_time_context(function_address, target.binary_path.clone()); + codegen.set_compile_time_context(function_address, target.binary_path.clone()); } info!( @@ -773,7 +818,7 @@ impl<'a> AstCompiler<'a> { ); // Use full AST compilation - let (_main_function, trace_context) = codegen_new + let (_main_function, trace_context) = codegen .compile_program( &crate::script::ast::Program { statements: vec![] }, // Empty program - statements passed separately &ebpf_function_name, @@ -782,7 +827,7 @@ impl<'a> AstCompiler<'a> { target.function_address, Some(&target.binary_path), ) - .map_err(|e| CompileError::LLVM(format!("Failed to compile AST program: {e}")))?; + .map_err(CompileError::CodeGen)?; info!( "Generated TraceContext for '{}' with {} strings and {} variables", @@ -791,7 +836,7 @@ impl<'a> AstCompiler<'a> { trace_context.variable_name_count() ); - let module = codegen_new.get_module(); + let module = codegen.get_module(); // Generate eBPF bytecode from LLVM module let ebpf_bytecode = Self::generate_ebpf_bytecode( diff --git a/ghostscope-dwarf/README.md b/ghostscope-dwarf/README.md index 61a82f3a..4b0c54be 100644 --- a/ghostscope-dwarf/README.md +++ b/ghostscope-dwarf/README.md @@ -1,5 +1,17 @@ # ghostscope-dwarf -`ghostscope-dwarf` parses DWARF data from ELF binaries so GhostScope can resolve variables, types, and addresses at runtime. It wraps `gimli`, `object`, `memmap2`, and symbol demanglers, and exposes async-friendly helpers for the rest of the workspace. +`ghostscope-dwarf` is GhostScope's PC-context DWARF semantic layer. It loads +ELF/DWARF data from process modules, resolves source locations, visible +variables, type layouts, globals, and address mappings, then produces semantic +read plans for the compiler. + +Those read plans describe how a value can be read at a specific probe PC, or why +it is unavailable. This keeps DWARF location expressions, optimized-out state, +ASLR-sensitive addresses, shadowing, and unsupported expression diagnostics in +the DWARF crate instead of spreading those decisions through the eBPF compiler. + +The crate wraps `gimli`, `object`, `memmap2`, and symbol demanglers, and exposes +async-friendly helpers for the rest of the workspace. Consumers should prefer +the PC-context planning APIs over interpreting raw DWARF locations directly. Consumers typically do not use this crate directly; it is re-exported via higher-level components. See the main GhostScope docs for usage examples: . diff --git a/ghostscope-dwarf/src/analyzer.rs b/ghostscope-dwarf/src/analyzer/mod.rs similarity index 74% rename from ghostscope-dwarf/src/analyzer.rs rename to ghostscope-dwarf/src/analyzer/mod.rs index 6ecf25d1..3af5ab40 100644 --- a/ghostscope-dwarf/src/analyzer.rs +++ b/ghostscope-dwarf/src/analyzer/mod.rs @@ -2,15 +2,26 @@ use crate::{ core::{ - mapping::ModuleMapping, CallerFrameRecovery, GlobalVariableInfo, ModuleAddress, Result, + mapping::ModuleMapping, CallerFrameRecovery, ModuleAddress, Result, SectionType, SourceLocation, }, objfile::LoadedObjfile, + semantics::{CompactUnwindRow, CompactUnwindTable, PcContext, VisibleVariable}, }; use object::{Object, ObjectSection}; use std::collections::HashMap; use std::path::{Path, PathBuf}; +mod plan_global; +mod plan_pc; +mod type_lookup; + +#[cfg(test)] +use crate::{ + core::{AddressExpr, Availability, Provenance, VariableLocation}, + semantics::VariableReadPlan, +}; + /// Events emitted during module loading process #[derive(Debug, Clone)] pub enum ModuleLoadingEvent { @@ -64,8 +75,8 @@ pub struct AddressQueryResult { pub source_column: Option, pub function_name: Option, pub is_inline: Option, - pub variables: Vec, - pub parameters: Vec, + pub variables: Vec, + pub parameters: Vec, } /// Rich query result for a function lookup across modules. @@ -85,28 +96,6 @@ pub struct DwarfAnalyzer { } impl DwarfAnalyzer { - fn resolve_type_shallow_by_name_in_module_with_tags>( - &self, - module_path: P, - name: &str, - tags: &[gimli::DwTag], - ) -> Option { - let path_buf = module_path.as_ref().to_path_buf(); - self.modules - .get(&path_buf) - .and_then(|module_data| module_data.resolve_type_shallow_by_name_with_tags(name, tags)) - } - - fn resolve_type_shallow_by_name_with_tags( - &self, - name: &str, - tags: &[gimli::DwTag], - ) -> Option { - self.modules - .values() - .find_map(|module_data| module_data.resolve_type_shallow_by_name_with_tags(name, tags)) - } - fn build_address_query_result( &self, module_address: &ModuleAddress, @@ -203,6 +192,28 @@ impl DwarfAnalyzer { }) } + fn sorted_module_paths(&self) -> Vec<&PathBuf> { + let mut paths: Vec<&PathBuf> = self.modules.keys().collect(); + paths.sort(); + paths + } + + /// Return the deterministic per-analyzer module id for a loaded module path. + pub fn module_id_for_path>(&self, module_path: P) -> Option { + let module_path = module_path.as_ref(); + self.sorted_module_paths() + .into_iter() + .position(|path| path.as_path() == module_path) + .map(|index| crate::ModuleId(index as u32)) + } + + /// Resolve a semantic module id back to its loaded module path. + pub fn module_path_for_id(&self, module: crate::ModuleId) -> Option<&Path> { + self.sorted_module_paths() + .get(module.0 as usize) + .map(|path| path.as_path()) + } + /// Create DWARF analyzer from PID (now uses parallel loading) pub async fn from_pid(pid: u32) -> Result { Self::from_pid_parallel(pid).await @@ -219,72 +230,6 @@ impl DwarfAnalyzer { } } - /// Resolve struct/class by name (shallow) in a specific module using only indexes - pub fn resolve_struct_type_shallow_by_name_in_module>( - &self, - module_path: P, - name: &str, - ) -> Option { - self.resolve_type_shallow_by_name_in_module_with_tags( - module_path, - name, - &[ - gimli::constants::DW_TAG_structure_type, - gimli::constants::DW_TAG_class_type, - ], - ) - } - - /// Resolve struct/class by name (shallow) across modules (first match) - pub fn resolve_struct_type_shallow_by_name(&self, name: &str) -> Option { - self.resolve_type_shallow_by_name_with_tags( - name, - &[ - gimli::constants::DW_TAG_structure_type, - gimli::constants::DW_TAG_class_type, - ], - ) - } - - /// Resolve union by name (shallow) in a specific module - pub fn resolve_union_type_shallow_by_name_in_module>( - &self, - module_path: P, - name: &str, - ) -> Option { - self.resolve_type_shallow_by_name_in_module_with_tags( - module_path, - name, - &[gimli::constants::DW_TAG_union_type], - ) - } - - /// Resolve union by name (shallow) across modules (first match) - pub fn resolve_union_type_shallow_by_name(&self, name: &str) -> Option { - self.resolve_type_shallow_by_name_with_tags(name, &[gimli::constants::DW_TAG_union_type]) - } - - /// Resolve enum by name (shallow) in a specific module - pub fn resolve_enum_type_shallow_by_name_in_module>( - &self, - module_path: P, - name: &str, - ) -> Option { - self.resolve_type_shallow_by_name_in_module_with_tags( - module_path, - name, - &[gimli::constants::DW_TAG_enumeration_type], - ) - } - - /// Resolve enum by name (shallow) across modules (first match) - pub fn resolve_enum_type_shallow_by_name(&self, name: &str) -> Option { - self.resolve_type_shallow_by_name_with_tags( - name, - &[gimli::constants::DW_TAG_enumeration_type], - ) - } - /// Create DWARF analyzer from PID using parallel loading pub async fn from_pid_parallel(pid: u32) -> Result { Self::from_pid_parallel_with_config(pid, &[], false, |_event| {}).await @@ -571,48 +516,6 @@ impl DwarfAnalyzer { } } - /// Get all variables visible at the given module address with EvaluationResult - /// - /// # Arguments - /// * `module_address` - Module address containing both module path and address offset - pub fn get_all_variables_at_address( - &self, - module_address: &ModuleAddress, - ) -> Result> { - tracing::info!( - "Looking up variables at address 0x{:x} in module {}", - module_address.address, - module_address.module_display() - ); - - if let Some(module_data) = self.modules.get(&module_address.module_path) { - module_data.get_all_variables_at_address(module_address.address) - } else { - tracing::warn!( - "Module {} not found in loaded modules", - module_address.module_display() - ); - Err(anyhow::anyhow!( - "Module {} not loaded", - module_address.module_display() - )) - } - } - - /// Plan a chain access (e.g., r.headers_in) and synthesize a VariableWithEvaluation - pub fn plan_chain_access( - &self, - module_address: &ModuleAddress, - base_var: &str, - chain: &[String], - ) -> Result> { - if let Some(module_data) = self.modules.get(&module_address.module_path) { - module_data.plan_chain_access(module_address.address, base_var, chain) - } else { - Ok(None) - } - } - /// Recover the direct caller frame at a module address as ComputeStep[]. pub fn recover_caller_frame( &self, @@ -626,174 +529,61 @@ impl DwarfAnalyzer { } } - /// Get all loaded module paths - pub fn get_loaded_modules(&self) -> Vec<&PathBuf> { - self.modules.keys().collect() + /// Recover the direct caller frame at a previously resolved PC context. + pub fn recover_caller_frame_for_context( + &self, + ctx: &PcContext, + registers: &[u16], + ) -> Result> { + let module_address = self.module_address_for_context(ctx)?; + self.recover_caller_frame(&module_address, registers) } - /// Find global/static variables by name across all loaded modules - pub fn find_global_variables_by_name(&self, name: &str) -> Vec<(PathBuf, GlobalVariableInfo)> { - let mut results = Vec::new(); - for (module_path, module_data) in &self.modules { - let vars = module_data.find_global_variables_by_name_any(name); - for v in vars { - results.push((module_path.clone(), v)); - } - } - if !results.is_empty() { - return results; - } - - // Fallback: scan all globals in each module and match by exact or leaf name - for (module_path, module_data) in &self.modules { - let all = module_data.list_all_global_variables(); - for v in all { - let leaf = v.name.rsplit("::").next().unwrap_or(&v.name).to_string(); - if v.name == name || leaf == name { - results.push((module_path.clone(), v)); - } - } - } - - results + /// Build compact unwind rows for the module referenced by a PC context. + pub fn compact_unwind_table_for_context( + &self, + ctx: &PcContext, + ) -> Result> { + let module_path = self + .module_path_for_id(ctx.module) + .ok_or_else(|| anyhow::anyhow!("Semantic module id {:?} is not loaded", ctx.module))?; + self.modules + .get(module_path) + .ok_or_else(|| anyhow::anyhow!("Module {} not loaded", module_path.display()))? + .compact_unwind_table(ctx.module) } - /// Plan a member/chain access across modules focusing on global/static variables. - /// Strict policy and order: - /// 1) Query globals index by base name (prefer current module first). - /// 2) For each candidate: try static-offset lowering when link-time address exists. - /// 3) Fallback to per-module planner at addr=0. - /// - /// Returns None if unresolved; never falls back to unrelated globals. - pub fn plan_global_chain_access( + /// Resolve the compact unwind row that covers a previously resolved PC context. + pub fn compact_unwind_row_for_context( &self, - prefer_module: &PathBuf, - base: &str, - fields: &[String], - ) -> Result> { - // 1) Globals across modules (strict) - let matches = self.find_global_variables_by_name(base); - if matches.is_empty() { - // Strict policy: if no global/base by name exists anywhere, stop here - return Ok(None); - } - - // Build preferred order: prefer current module first - let mut ordered: Vec<(PathBuf, GlobalVariableInfo)> = Vec::new(); - for (mpath, info) in matches.iter() { - if *mpath == *prefer_module { - ordered.push((mpath.clone(), info.clone())); - } - } - for (mpath, info) in matches.into_iter() { - if mpath != *prefer_module { - ordered.push((mpath, info)); - } - } - - for (mpath, info) in ordered.into_iter() { - // 2a) Static-offset lowering when link-time address is available - if let Some(link) = info.link_address { - if let Ok(Some((off, final_ty))) = self.compute_global_member_static_offset( - &mpath, - link, - info.unit_offset, - info.die_offset, - fields, - ) { - let name = if fields.is_empty() { - base.to_string() - } else { - format!("{base}.{}", fields.join(".")) - }; - let var = crate::VariableWithEvaluation { - name, - type_name: final_ty.type_name(), - dwarf_type: Some(final_ty), - evaluation_result: crate::core::EvaluationResult::MemoryLocation( - crate::core::LocationResult::Address(link + off), - ), - scope_depth: 0, - is_parameter: false, - is_artificial: false, - }; - tracing::info!( - "plan_global_chain_access: resolved '{}' in module '{}' via static-offset", - base, - mpath.display() - ); - return Ok(Some((mpath, var))); - } - } - - // 2b) Module planner fallback at addr=0 - let ma = ModuleAddress::new(mpath.clone(), 0); - match self.plan_chain_access(&ma, base, fields) { - Ok(Some(v)) => { - tracing::info!( - "plan_global_chain_access: resolved '{}' in module '{}' via planner", - base, - ma.module_display() - ); - return Ok(Some((mpath, v))); - } - Ok(None) => {} - Err(e) => { - tracing::debug!( - "plan_global_chain_access: planner miss in module '{}': {}", - ma.module_display(), - e - ); - } - } - } - - Ok(None) + ctx: &PcContext, + ) -> Result> { + let module_path = self + .module_path_for_id(ctx.module) + .ok_or_else(|| anyhow::anyhow!("Semantic module id {:?} is not loaded", ctx.module))?; + self.modules + .get(module_path) + .ok_or_else(|| anyhow::anyhow!("Module {} not loaded", module_path.display()))? + .compact_unwind_row(ctx.module, ctx.normalized_pc) } - /// Resolve a variable by CU/DIE offsets in a specific module at an arbitrary address context (for globals) - pub fn resolve_variable_by_offsets_in_module>( + /// Build compact unwind rows for a loaded semantic module id. + pub fn compact_unwind_table_for_module( &self, - module_path: P, - cu_off: gimli::DebugInfoOffset, - die_off: gimli::UnitOffset, - ) -> Result { - let path_buf = module_path.as_ref().to_path_buf(); - if let Some(module_data) = self.modules.get(&path_buf) { - let items = vec![(cu_off, die_off)]; - let vars = module_data.resolve_variables_by_offsets_at_address(0, &items)?; - let mut var = vars.into_iter().next().ok_or_else(|| { - anyhow::anyhow!( - "Failed to resolve variable at offsets {:?}/{:?} in module {}", - cu_off, - die_off, - path_buf.display() - ) - })?; - if var.dwarf_type.is_none() { - if let Some(ti) = module_data.shallow_type_for_variable_offsets(cu_off, die_off) { - var.type_name = ti.type_name(); - var.dwarf_type = Some(ti); - } - } - Ok(var) - } else { - Err(anyhow::anyhow!( - "Module {} not loaded", - module_path.as_ref().display() - )) - } + module: crate::ModuleId, + ) -> Result> { + let module_path = self + .module_path_for_id(module) + .ok_or_else(|| anyhow::anyhow!("Semantic module id {:?} is not loaded", module))?; + self.modules + .get(module_path) + .ok_or_else(|| anyhow::anyhow!("Module {} not loaded", module_path.display()))? + .compact_unwind_table(module) } - /// List all global/static variables with usable addresses across all loaded modules - pub fn list_all_global_variables(&self) -> Vec<(PathBuf, GlobalVariableInfo)> { - let mut results = Vec::new(); - for (module_path, module_data) in &self.modules { - for v in module_data.list_all_global_variables() { - results.push((module_path.clone(), v)); - } - } - results + /// Get all loaded module paths + pub fn get_loaded_modules(&self) -> Vec<&PathBuf> { + self.modules.keys().collect() } /// Classify the section type for a link-time virtual address in a specific module @@ -801,7 +591,7 @@ impl DwarfAnalyzer { &self, module_path: P, vaddr: u64, - ) -> Option { + ) -> Option { let path = module_path.as_ref(); if let Some(module_data) = self.modules.get(path) { module_data.classify_section_for_vaddr(vaddr) @@ -810,26 +600,6 @@ impl DwarfAnalyzer { } } - /// Compute static offset for a global variable member chain - pub fn compute_global_member_static_offset>( - &self, - module_path: P, - link_address: u64, - cu_off: gimli::DebugInfoOffset, - var_die: gimli::UnitOffset, - fields: &[String], - ) -> Result> { - let path_buf = module_path.as_ref().to_path_buf(); - if let Some(module_data) = self.modules.get(&path_buf) { - module_data.compute_global_member_static_offset(cu_off, var_die, link_address, fields) - } else { - Err(anyhow::anyhow!( - "Module {} not loaded", - module_path.as_ref().display() - )) - } - } - /// Lookup function address by name - returns first match /// Returns ModuleAddress for the first function found pub fn lookup_function_address_by_name(&self, function_name: &str) -> Option { @@ -1289,3 +1059,163 @@ pub struct SimpleFileInfo { pub basename: String, pub directory: String, } + +#[cfg(test)] +mod tests { + use super::*; + + fn global_plan(name: &str, address: u64) -> VariableReadPlan { + VariableReadPlan { + name: name.to_string(), + type_name: "int".to_string(), + dwarf_type: Some(crate::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }), + declaration: None, + type_id: None, + location: VariableLocation::Address(AddressExpr::constant(address)), + availability: Availability::Available, + scope_depth: 0, + is_parameter: false, + is_artificial: false, + pc_range: None, + inline_context: None, + provenance: Provenance::Synthesized { + detail: "test".to_string(), + }, + } + } + + fn visible_var(name: &str, scope_depth: usize) -> VisibleVariable { + VisibleVariable { + name: name.to_string(), + type_name: "int".to_string(), + dwarf_type: Some(crate::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }), + declaration: None, + type_id: None, + location: VariableLocation::RegisterValue { dwarf_reg: 0 }, + availability: Availability::Available, + scope_depth, + is_parameter: false, + is_artificial: false, + } + } + + fn diagnostic( + name: &str, + scope_depth: usize, + detail: &str, + ) -> crate::semantics::VariableQueryDiagnostic { + crate::semantics::VariableQueryDiagnostic { + pc: 0x1234, + name: Some(name.to_string()), + scope_depth, + availability: Availability::Unsupported(crate::UnsupportedReason::ExpressionShape { + detail: detail.to_string(), + }), + detail: detail.to_string(), + } + } + + #[test] + fn variable_selection_rejects_inner_diagnostic_over_outer_match() { + let err = DwarfAnalyzer::select_visible_variable_by_name( + 0x1234, + "state", + vec![visible_var("state", 1)], + &[diagnostic("state", 2, "DW_OP_bad is unsupported")], + ) + .expect_err("inner unavailable variable should block outer fallback"); + + assert!(err.to_string().contains("Unavailable variable 'state'")); + assert!(err.to_string().contains("DW_OP_bad is unsupported")); + } + + #[test] + fn variable_selection_keeps_inner_match_over_outer_diagnostic() { + let selected = DwarfAnalyzer::select_visible_variable_by_name( + 0x1234, + "state", + vec![visible_var("state", 2)], + &[diagnostic("state", 1, "outer variable is unavailable")], + ) + .expect("outer diagnostic should not block inner match") + .expect("inner match should be returned"); + + assert_eq!(selected.name, "state"); + assert_eq!(selected.scope_depth, 2); + } + + #[test] + fn global_plan_selection_rejects_ambiguous_matches() { + let err = DwarfAnalyzer::select_unambiguous_global_plan( + "state", + vec![ + (PathBuf::from("/tmp/a"), global_plan("state", 0x1000)), + (PathBuf::from("/tmp/b"), global_plan("state", 0x2000)), + ], + ) + .expect_err("multiple global candidates should be ambiguous"); + + assert!(err.to_string().contains("Ambiguous global 'state'")); + assert!(err.to_string().contains("2 matches")); + } + + #[test] + fn global_plan_selection_accepts_single_match() { + let selected = DwarfAnalyzer::select_unambiguous_global_plan( + "state", + vec![(PathBuf::from("/tmp/a"), global_plan("state", 0x1000))], + ) + .expect("single global candidate should be accepted") + .expect("single global candidate should be returned"); + + assert_eq!(selected.0, PathBuf::from("/tmp/a")); + assert_eq!(selected.1.name, "state"); + } + + #[test] + fn global_plan_selection_prefers_current_module_match() { + let selected = DwarfAnalyzer::select_global_plan_with_preferred_module( + "state", + Path::new("/tmp/current"), + vec![ + (PathBuf::from("/tmp/other"), global_plan("state", 0x2000)), + (PathBuf::from("/tmp/current"), global_plan("state", 0x1000)), + ], + ) + .expect("current module candidate should be accepted") + .expect("current module candidate should be returned"); + + assert_eq!(selected.0, PathBuf::from("/tmp/current")); + assert_eq!( + selected.1.location, + VariableLocation::Address(AddressExpr::constant(0x1000)) + ); + } + + #[test] + fn global_plan_selection_rejects_ambiguous_current_module_matches() { + let err = DwarfAnalyzer::select_global_plan_with_preferred_module( + "state", + Path::new("/tmp/current"), + vec![ + (PathBuf::from("/tmp/current"), global_plan("state", 0x1000)), + (PathBuf::from("/tmp/current"), global_plan("state", 0x1004)), + (PathBuf::from("/tmp/other"), global_plan("state", 0x2000)), + ], + ) + .expect_err("duplicate current-module candidates should be ambiguous"); + + assert!(err.to_string().contains("Ambiguous global 'state'")); + assert!(err.to_string().contains("2 matches")); + assert!(err.to_string().contains("/tmp/current")); + assert!(!err.to_string().contains("/tmp/other")); + } +} diff --git a/ghostscope-dwarf/src/analyzer/plan_global.rs b/ghostscope-dwarf/src/analyzer/plan_global.rs new file mode 100644 index 00000000..a76f3d02 --- /dev/null +++ b/ghostscope-dwarf/src/analyzer/plan_global.rs @@ -0,0 +1,212 @@ +use super::DwarfAnalyzer; +use crate::{ + core::{GlobalVariableInfo, Provenance, Result}, + semantics::{VariableAccessPath, VariableReadPlan}, +}; +use std::path::{Path, PathBuf}; + +impl DwarfAnalyzer { + pub(super) fn select_unambiguous_global_plan( + base: &str, + mut candidates: Vec<(PathBuf, VariableReadPlan)>, + ) -> Result> { + match candidates.len() { + 0 => Ok(None), + 1 => Ok(candidates.pop()), + count => { + let details = candidates + .iter() + .map(|(module_path, plan)| { + let declaration = plan + .declaration + .map(|die| format!(" cu={} die=0x{:x}", die.cu.0, die.offset)) + .unwrap_or_default(); + format!("{}{}", module_path.display(), declaration) + }) + .collect::>() + .join(", "); + Err(anyhow::anyhow!( + "Ambiguous global '{base}': {count} matches [{details}]" + )) + } + } + } + + pub(super) fn select_global_plan_with_preferred_module( + base: &str, + prefer_module: &Path, + candidates: Vec<(PathBuf, VariableReadPlan)>, + ) -> Result> { + let (preferred, fallback): (Vec<_>, Vec<_>) = candidates + .into_iter() + .partition(|(module_path, _)| module_path == prefer_module); + if !preferred.is_empty() { + return Self::select_unambiguous_global_plan(base, preferred); + } + + Self::select_unambiguous_global_plan(base, fallback) + } + + /// Find global/static variables by name across all loaded modules + pub fn find_global_variables_by_name(&self, name: &str) -> Vec<(PathBuf, GlobalVariableInfo)> { + let mut results = Vec::new(); + for (module_path, module_data) in &self.modules { + let vars = module_data.find_global_variables_by_name_any(name); + for v in vars { + results.push((module_path.clone(), v)); + } + } + if !results.is_empty() { + return results; + } + + // Fallback: scan all globals in each module and match by exact or leaf name + for (module_path, module_data) in &self.modules { + let all = module_data.list_all_global_variables(); + for v in all { + let leaf = v.name.rsplit("::").next().unwrap_or(&v.name).to_string(); + if v.name == name || leaf == name { + results.push((module_path.clone(), v)); + } + } + } + + results + } + + /// Plan a global/static member chain as a neutral read plan. + pub fn plan_global_chain_access_read_plan( + &self, + prefer_module: &PathBuf, + base: &str, + fields: &[String], + ) -> Result> { + self.plan_global_access_read_plan( + prefer_module, + base, + &VariableAccessPath::fields(fields.iter().cloned()), + ) + } + + /// Plan a global/static source-level access path as a neutral read plan. + pub fn plan_global_access_read_plan( + &self, + prefer_module: &PathBuf, + base: &str, + path: &VariableAccessPath, + ) -> Result> { + let matches = self.find_global_variables_by_name(base); + if matches.is_empty() { + return Ok(None); + } + + let mut ordered: Vec<(PathBuf, GlobalVariableInfo)> = Vec::new(); + for (module_path, info) in matches.iter() { + if *module_path == *prefer_module { + ordered.push((module_path.clone(), info.clone())); + } + } + for (module_path, info) in matches.into_iter() { + if module_path != *prefer_module { + ordered.push((module_path, info)); + } + } + + let mut direct_matches = Vec::new(); + let mut last_error = None; + for (module_path, info) in ordered { + let base_plan = match self.resolve_variable_read_plan_by_offsets_in_module( + &module_path, + info.unit_offset, + info.die_offset, + Provenance::Synthesized { + detail: "global access".to_string(), + }, + ) { + Ok(plan) => plan, + Err(err) => { + last_error = Some(err); + continue; + } + }; + + match self.plan_access_path_with_type_completion(&module_path, base_plan, path) { + Ok(plan) => direct_matches.push((module_path, plan)), + Err(primary_error) => { + if Self::is_value_backed_aggregate_access_error(&primary_error) { + return Err(primary_error); + } + last_error = Some(primary_error); + } + } + } + + if !direct_matches.is_empty() { + return Self::select_global_plan_with_preferred_module( + base, + prefer_module, + direct_matches, + ); + } + + if let Some(err) = last_error { + return Err(err); + } + Ok(None) + } + + fn resolve_variable_read_plan_by_offsets_in_module>( + &self, + module_path: P, + cu_off: gimli::DebugInfoOffset, + die_off: gimli::UnitOffset, + provenance: Provenance, + ) -> Result { + let variable = self.resolve_variable_by_offsets_in_module(module_path, cu_off, die_off)?; + Ok(Self::read_plan_from_variable(variable, provenance)) + } + + fn resolve_variable_by_offsets_in_module>( + &self, + module_path: P, + cu_off: gimli::DebugInfoOffset, + die_off: gimli::UnitOffset, + ) -> Result { + let path_buf = module_path.as_ref().to_path_buf(); + if let Some(module_data) = self.modules.get(&path_buf) { + let items = vec![(cu_off, die_off)]; + let vars = module_data.resolve_variables_by_offsets_at_address(0, &items)?; + let mut var = vars.into_iter().next().ok_or_else(|| { + anyhow::anyhow!( + "Failed to resolve variable at offsets {:?}/{:?} in module {}", + cu_off, + die_off, + path_buf.display() + ) + })?; + if var.dwarf_type.is_none() { + if let Some(ti) = module_data.shallow_type_for_variable_offsets(cu_off, die_off) { + var.type_name = ti.type_name(); + var.dwarf_type = Some(ti); + } + } + Ok(var) + } else { + Err(anyhow::anyhow!( + "Module {} not loaded", + module_path.as_ref().display() + )) + } + } + + /// List all global/static variables with usable addresses across all loaded modules + pub fn list_all_global_variables(&self) -> Vec<(PathBuf, GlobalVariableInfo)> { + let mut results = Vec::new(); + for (module_path, module_data) in &self.modules { + for v in module_data.list_all_global_variables() { + results.push((module_path.clone(), v)); + } + } + results + } +} diff --git a/ghostscope-dwarf/src/analyzer/plan_pc.rs b/ghostscope-dwarf/src/analyzer/plan_pc.rs new file mode 100644 index 00000000..b99c536f --- /dev/null +++ b/ghostscope-dwarf/src/analyzer/plan_pc.rs @@ -0,0 +1,375 @@ +use super::DwarfAnalyzer; +use crate::{ + core::{ModuleAddress, Provenance, Result}, + semantics::{ + AddressSpaceInfo, PcContext, PcLineInfo, PlanError, VariableAccessPath, + VariableAccessSegment, VariableReadPlan, VisibleVariable, VisibleVariablesResult, + }, +}; +use std::path::Path; + +impl DwarfAnalyzer { + /// Resolve a module-address pair into the first PC-centered semantic context. + /// + /// Today `ModuleAddress.address` is the module/DWARF PC used by the existing + /// query APIs, so `pc` and `normalized_pc` intentionally match. Runtime + /// rebasing details are preserved in `address_space` for future lowering. + pub fn resolve_pc(&self, module_address: &ModuleAddress) -> Result { + let module_data = self + .modules + .get(&module_address.module_path) + .ok_or_else(|| { + anyhow::anyhow!("Module {} not loaded", module_address.module_display()) + })?; + let module = self + .module_id_for_path(&module_address.module_path) + .ok_or_else(|| { + anyhow::anyhow!( + "Module {} has no semantic module id", + module_address.module_display() + ) + })?; + + let (cu, function, lexical_scopes, inline_chain) = module_data + .resolve_pc_scopes(module, module_address.address) + .unwrap_or_else(|error| { + tracing::debug!( + "Failed to resolve semantic PC scopes for {}:0x{:x}: {}", + module_address.module_display(), + module_address.address, + error + ); + (None, None, Vec::new(), Vec::new()) + }); + let source_location = module_data.lookup_source_location(module_address.address); + let line = source_location.map(|location| PcLineInfo { + file_path: location.file_path, + line_number: location.line_number, + column: location.column, + address: location.address, + }); + let function_name = module_data.find_function_name_by_address(module_address.address); + let is_inline = module_data.is_inline_at(module_address.address); + let mapping = module_data.module_mapping(); + + Ok(PcContext { + module, + pc: module_address.address, + normalized_pc: module_address.address, + cu, + function, + function_name, + lexical_scopes, + inline_chain, + is_inline, + line, + address_space: AddressSpaceInfo { + module_path: Some(mapping.path.clone()), + runtime_base: mapping.loaded_address, + link_base: None, + }, + }) + } + + /// Return variables visible at a previously resolved PC context. + pub fn visible_variables(&self, ctx: &PcContext) -> Result> { + Ok(self.visible_variables_with_diagnostics(ctx)?.variables) + } + + /// Return variables visible at a PC context plus non-fatal DWARF diagnostics. + pub fn visible_variables_with_diagnostics( + &self, + ctx: &PcContext, + ) -> Result { + let module_address = self.module_address_for_context(ctx)?; + + let (variables, diagnostics) = self + .modules + .get(&module_address.module_path) + .ok_or_else(|| { + anyhow::anyhow!("Module {} not loaded", module_address.module_display()) + })? + .get_visible_variables_at_address_best_effort_with_diagnostics( + ctx.module, + module_address.address, + )?; + let mut variables: Vec = variables + .into_iter() + .map(|variable| variable.visible_variable()) + .collect(); + + variables.sort_by(|a, b| { + a.scope_depth + .cmp(&b.scope_depth) + .then_with(|| b.is_parameter.cmp(&a.is_parameter)) + .then_with(|| a.name.cmp(&b.name)) + }); + Ok(VisibleVariablesResult { + variables, + diagnostics, + }) + } + + pub(super) fn module_address_for_context(&self, ctx: &PcContext) -> Result { + let module_path = match ctx.address_space.module_path.as_deref() { + Some(path) => path, + None => self.module_path_for_id(ctx.module).ok_or_else(|| { + anyhow::anyhow!("Semantic module id {:?} is not loaded", ctx.module) + })?, + }; + Ok(ModuleAddress::new( + module_path.to_path_buf(), + ctx.normalized_pc, + )) + } + + pub(super) fn is_value_backed_aggregate_access_error(err: &anyhow::Error) -> bool { + err.downcast_ref::() + .is_some_and(PlanError::is_value_backed_aggregate_access) + } + + pub(super) fn read_plan_from_variable( + variable: crate::parser::VariableWithEvaluation, + provenance: Provenance, + ) -> VariableReadPlan { + VariableReadPlan::from_visible_variable(variable.visible_variable(), provenance) + } + + pub(super) fn plan_access_path_with_type_completion( + &self, + module_path: &Path, + mut plan: VariableReadPlan, + path: &VariableAccessPath, + ) -> Result { + for segment in &path.segments { + let pointer_type_name = plan.type_name.clone(); + self.complete_unknown_pointer_target_type(module_path, &mut plan, &pointer_type_name); + plan = plan.plan_access_path(&VariableAccessPath::new(vec![segment.clone()]))?; + if matches!(segment, VariableAccessSegment::Dereference) { + self.complete_unknown_pointer_target_type( + module_path, + &mut plan, + &pointer_type_name, + ); + } + } + + Ok(plan) + } + + /// Plan a visible variable by source name at a previously resolved PC context. + /// + /// Exact names are preferred over producer-synthesized names like `name@...`. + pub fn plan_variable_by_name( + &self, + ctx: &PcContext, + name: &str, + ) -> Result> { + let VisibleVariablesResult { + variables: visible_variables, + diagnostics, + } = self.visible_variables_with_diagnostics(ctx)?; + + Self::select_visible_variable_by_name( + ctx.normalized_pc, + name, + visible_variables, + &diagnostics, + ) + .map(|variable| { + variable.map(|variable| { + VariableReadPlan::from_visible_variable(variable, Provenance::DirectDie) + }) + }) + } + + pub(super) fn select_visible_variable_by_name( + pc: u64, + name: &str, + visible_variables: Vec, + diagnostics: &[crate::semantics::VariableQueryDiagnostic], + ) -> Result> { + let synthesized_prefix = format!("{name}@"); + let matching_diagnostics = diagnostics + .iter() + .filter(|diagnostic| { + diagnostic.name.as_deref().is_some_and(|diagnostic_name| { + diagnostic_name == name || diagnostic_name.starts_with(&synthesized_prefix) + }) + }) + .collect::>(); + + let exact_matches = visible_variables + .iter() + .filter(|variable| variable.name == name) + .cloned() + .collect::>(); + + let mut candidates = if exact_matches.is_empty() { + visible_variables + .into_iter() + .filter(|variable| variable.name.starts_with(&synthesized_prefix)) + .collect::>() + } else { + exact_matches + }; + + if candidates.is_empty() { + if let Some(diagnostic) = matching_diagnostics + .iter() + .max_by_key(|diagnostic| diagnostic.scope_depth) + { + return Err(anyhow::anyhow!( + "Unavailable variable '{name}' at PC 0x{:x}: {}", + pc, + diagnostic.detail + )); + } + return Ok(None); + } + + let max_scope_depth = candidates + .iter() + .map(|variable| variable.scope_depth) + .max() + .unwrap_or(0); + if let Some(diagnostic) = matching_diagnostics + .iter() + .filter(|diagnostic| diagnostic.scope_depth > max_scope_depth) + .max_by_key(|diagnostic| diagnostic.scope_depth) + { + return Err(anyhow::anyhow!( + "Unavailable variable '{name}' at PC 0x{:x}: {}", + pc, + diagnostic.detail + )); + } + candidates.retain(|variable| variable.scope_depth == max_scope_depth); + + if candidates.iter().any(|variable| !variable.is_artificial) { + candidates.retain(|variable| !variable.is_artificial); + } + + candidates.dedup(); + if candidates.len() > 1 { + let names = candidates + .iter() + .map(|variable| variable.name.as_str()) + .collect::>() + .join(", "); + return Err(anyhow::anyhow!( + "Ambiguous variable '{name}' at PC 0x{:x}: candidates [{}]", + pc, + names + )); + } + + Ok(candidates.into_iter().next()) + } + + /// Plan a visible variable by stable variable id at a previously resolved PC context. + /// + /// This is the identity-based path for callers that first enumerate + /// `visible_variables(ctx)` and then request a read plan without relying on + /// a potentially ambiguous source name. + pub fn plan_variable( + &self, + ctx: &PcContext, + variable_id: crate::VariableId, + ) -> Result> { + if variable_id.declaration.module != ctx.module { + return Err(anyhow::anyhow!( + "VariableId module {:?} does not match PcContext module {:?}", + variable_id.declaration.module, + ctx.module + )); + } + + let matches = self + .visible_variables(ctx)? + .into_iter() + .filter(|variable| variable.declaration == Some(variable_id.declaration)) + .collect::>(); + + match matches.as_slice() { + [] => Ok(None), + [variable] => Ok(Some(VariableReadPlan::from_visible_variable( + variable.clone(), + Provenance::DirectDie, + ))), + _ => Err(anyhow::anyhow!( + "Ambiguous VariableId {:?} at PC 0x{:x}: {} visible matches", + variable_id, + ctx.normalized_pc, + matches.len() + )), + } + } + + /// Plan a source-level access path from a visible variable id at a PC context. + pub fn plan_variable_access( + &self, + ctx: &PcContext, + variable_id: crate::VariableId, + path: &VariableAccessPath, + ) -> Result> { + let Some(plan) = self.plan_variable(ctx, variable_id)? else { + return Ok(None); + }; + let module_path = self + .module_path_for_id(ctx.module) + .ok_or_else(|| anyhow::anyhow!("Semantic module id {:?} is not loaded", ctx.module))? + .to_path_buf(); + + self.plan_access_path_with_type_completion(&module_path, plan, path) + .map(Some) + } + + /// Plan a source-level access path from a visible variable at a PC context. + pub fn plan_variable_access_by_name( + &self, + ctx: &PcContext, + name: &str, + path: &VariableAccessPath, + ) -> Result> { + let Some(plan) = self.plan_variable_by_name(ctx, name)? else { + return Ok(None); + }; + let module_path = self + .module_path_for_id(ctx.module) + .ok_or_else(|| anyhow::anyhow!("Semantic module id {:?} is not loaded", ctx.module))? + .to_path_buf(); + + self.plan_access_path_with_type_completion(&module_path, plan, path) + .map(Some) + } + + /// Get all variables visible at the given module address as semantic views. + /// + /// # Arguments + /// * `module_address` - Module address containing both module path and address offset + pub fn get_all_variables_at_address( + &self, + module_address: &ModuleAddress, + ) -> Result> { + tracing::info!( + "Looking up variables at address 0x{:x} in module {}", + module_address.address, + module_address.module_display() + ); + let ctx = self.resolve_pc(module_address)?; + self.visible_variables(&ctx) + } + + /// Plan a chain access (e.g., r.headers_in) as a neutral read plan. + pub fn plan_chain_access_read_plan( + &self, + module_address: &ModuleAddress, + base_var: &str, + chain: &[String], + ) -> Result> { + let ctx = self.resolve_pc(module_address)?; + let path = VariableAccessPath::fields(chain.iter().cloned()); + self.plan_variable_access_by_name(&ctx, base_var, &path) + } +} diff --git a/ghostscope-dwarf/src/analyzer/type_lookup.rs b/ghostscope-dwarf/src/analyzer/type_lookup.rs new file mode 100644 index 00000000..a059b1d8 --- /dev/null +++ b/ghostscope-dwarf/src/analyzer/type_lookup.rs @@ -0,0 +1,191 @@ +use super::DwarfAnalyzer; +use crate::semantics::VariableReadPlan; +use std::path::Path; + +impl DwarfAnalyzer { + fn resolve_type_shallow_by_name_in_module_with_tags>( + &self, + module_path: P, + name: &str, + tags: &[gimli::DwTag], + ) -> Option { + let path_buf = module_path.as_ref().to_path_buf(); + self.modules + .get(&path_buf) + .and_then(|module_data| module_data.resolve_type_shallow_by_name_with_tags(name, tags)) + } + + fn resolve_type_shallow_by_name_with_tags( + &self, + name: &str, + tags: &[gimli::DwTag], + ) -> Option { + self.modules + .values() + .find_map(|module_data| module_data.resolve_type_shallow_by_name_with_tags(name, tags)) + } + + pub(super) fn complete_unknown_pointer_target_type( + &self, + module_path: &Path, + plan: &mut VariableReadPlan, + pointer_type_name: &str, + ) { + let Some(dwarf_type) = plan.dwarf_type.clone() else { + return; + }; + + let (unknown_name, pointer_size) = match dwarf_type { + crate::TypeInfo::UnknownType { name } => (name, None), + crate::TypeInfo::PointerType { target_type, size } => { + let crate::TypeInfo::UnknownType { name } = *target_type else { + return; + }; + (name, Some(size)) + } + _ => return, + }; + + let mut candidate_names = Vec::new(); + if !unknown_name.is_empty() && unknown_name != "void" { + candidate_names.push(unknown_name); + } + if candidate_names.is_empty() { + if let Some(index) = pointer_type_name.find('*') { + let mut base = pointer_type_name[..index].trim().to_string(); + for prefix in [ + "const ", + "volatile ", + "restrict ", + "struct ", + "class ", + "union ", + ] { + if base.starts_with(prefix) { + base = base[prefix.len()..].trim().to_string(); + } + } + if !base.is_empty() && base != "void" { + candidate_names.push(base); + } + } + } + + for candidate in candidate_names { + let Some(upgraded) = self.resolve_shallow_named_pointer_target(module_path, &candidate) + else { + continue; + }; + + let upgraded = Self::named_type(candidate, upgraded); + plan.dwarf_type = Some(if let Some(size) = pointer_size { + crate::TypeInfo::PointerType { + target_type: Box::new(upgraded), + size, + } + } else { + upgraded + }); + if let Some(dwarf_type) = plan.dwarf_type.as_ref() { + plan.type_name = dwarf_type.type_name(); + } + return; + } + } + + fn named_type(name: String, ty: crate::TypeInfo) -> crate::TypeInfo { + match ty { + crate::TypeInfo::StructType { .. } + | crate::TypeInfo::UnionType { .. } + | crate::TypeInfo::EnumType { .. } => crate::TypeInfo::TypedefType { + name, + underlying_type: Box::new(ty), + }, + _ => ty, + } + } + + fn resolve_shallow_named_pointer_target( + &self, + module_path: &Path, + name: &str, + ) -> Option { + [ + self.resolve_struct_type_shallow_by_name(name), + self.resolve_struct_type_shallow_by_name_in_module(module_path, name), + self.resolve_union_type_shallow_by_name(name), + self.resolve_union_type_shallow_by_name_in_module(module_path, name), + self.resolve_enum_type_shallow_by_name(name), + self.resolve_enum_type_shallow_by_name_in_module(module_path, name), + ] + .into_iter() + .flatten() + .find(|ty| ty.size() > 0) + } + + /// Resolve struct/class by name (shallow) in a specific module using only indexes + pub fn resolve_struct_type_shallow_by_name_in_module>( + &self, + module_path: P, + name: &str, + ) -> Option { + self.resolve_type_shallow_by_name_in_module_with_tags( + module_path, + name, + &[ + gimli::constants::DW_TAG_structure_type, + gimli::constants::DW_TAG_class_type, + ], + ) + } + + /// Resolve struct/class by name (shallow) across modules (first match) + pub fn resolve_struct_type_shallow_by_name(&self, name: &str) -> Option { + self.resolve_type_shallow_by_name_with_tags( + name, + &[ + gimli::constants::DW_TAG_structure_type, + gimli::constants::DW_TAG_class_type, + ], + ) + } + + /// Resolve union by name (shallow) in a specific module + pub fn resolve_union_type_shallow_by_name_in_module>( + &self, + module_path: P, + name: &str, + ) -> Option { + self.resolve_type_shallow_by_name_in_module_with_tags( + module_path, + name, + &[gimli::constants::DW_TAG_union_type], + ) + } + + /// Resolve union by name (shallow) across modules (first match) + pub fn resolve_union_type_shallow_by_name(&self, name: &str) -> Option { + self.resolve_type_shallow_by_name_with_tags(name, &[gimli::constants::DW_TAG_union_type]) + } + + /// Resolve enum by name (shallow) in a specific module + pub fn resolve_enum_type_shallow_by_name_in_module>( + &self, + module_path: P, + name: &str, + ) -> Option { + self.resolve_type_shallow_by_name_in_module_with_tags( + module_path, + name, + &[gimli::constants::DW_TAG_enumeration_type], + ) + } + + /// Resolve enum by name (shallow) across modules (first match) + pub fn resolve_enum_type_shallow_by_name(&self, name: &str) -> Option { + self.resolve_type_shallow_by_name_with_tags( + name, + &[gimli::constants::DW_TAG_enumeration_type], + ) + } +} diff --git a/ghostscope-dwarf/src/core/diagnostic.rs b/ghostscope-dwarf/src/core/diagnostic.rs new file mode 100644 index 00000000..b88f532f --- /dev/null +++ b/ghostscope-dwarf/src/core/diagnostic.rs @@ -0,0 +1,187 @@ +//! Precise semantic availability and diagnostic categories. + +use crate::core::EvaluationResult; + +/// Whether a semantic result is usable at the requested PC. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Availability { + Available, + PartiallyAvailable, + OptimizedOut, + NotInScope, + Unsupported(UnsupportedReason), + Requires(RuntimeRequirement), + Ambiguous(AmbiguityReason), +} + +impl Availability { + pub fn is_available(&self) -> bool { + matches!(self, Self::Available | Self::PartiallyAvailable) + } + + pub fn from_evaluation_result(result: &EvaluationResult) -> Self { + match result { + EvaluationResult::Optimized => Self::OptimizedOut, + EvaluationResult::Composite(pieces) => { + if pieces.is_empty() { + Self::Available + } else if pieces + .iter() + .all(|piece| matches!(piece.location, EvaluationResult::Optimized)) + { + Self::OptimizedOut + } else if pieces + .iter() + .any(|piece| matches!(piece.location, EvaluationResult::Optimized)) + { + Self::PartiallyAvailable + } else { + Self::Available + } + } + _ => Self::Available, + } + } +} + +impl From<&EvaluationResult> for Availability { + fn from(value: &EvaluationResult) -> Self { + Self::from_evaluation_result(value) + } +} + +/// DWARF or semantic shapes the current engine cannot represent yet. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum UnsupportedReason { + DwarfOp { op: String }, + ExpressionShape { detail: String }, + TypeLayout { detail: String }, + AddressClass { detail: String }, + RegisterMapping { dwarf_reg: u16 }, +} + +/// Runtime feature required before a semantic plan can be lowered safely. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RuntimeRequirement { + CallerFrame, + SleepableUprobe, + UserMemoryRead, + DwarfCfiRecovery, +} + +/// Reason a query could not pick one unambiguous semantic interpretation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AmbiguityReason { + InlineContext { detail: String }, + VariableDeclaration { detail: String }, + TypeResolution { detail: String }, +} + +/// Where a semantic answer came from. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Provenance { + DirectDie, + AbstractOrigin, + Specification, + LocationList, + CallSite, + Cfi, + Synthesized { detail: String }, +} + +/// Capabilities available to a future BPF lowering pass. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RuntimeCapabilities { + pub regular_uprobe: bool, + pub sleepable_uprobe: bool, + pub uprobe_multi: bool, + pub copy_from_user_task: bool, + pub max_bpf_stack_bytes: usize, + pub bounded_loops: bool, + pub arch: TargetArch, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TargetArch { + X86_64, + Aarch64, + Unknown, +} + +impl TargetArch { + pub fn current() -> Self { + if cfg!(target_arch = "x86_64") { + Self::X86_64 + } else if cfg!(target_arch = "aarch64") { + Self::Aarch64 + } else { + Self::Unknown + } + } +} + +impl Default for RuntimeCapabilities { + fn default() -> Self { + Self { + regular_uprobe: true, + sleepable_uprobe: false, + uprobe_multi: false, + copy_from_user_task: false, + max_bpf_stack_bytes: 512, + bounded_loops: true, + arch: TargetArch::current(), + } + } +} + +/// User-memory helper strategy selected by a lowering plan. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum HelperMode { + NoUserMemoryRead, + ProbeReadUser, + CopyFromUserTask, +} + +/// Coarse verifier risk surfaced before backend codegen. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VerifierRisk { + Low, + RequiresBoundedLoops, + StackBudgetExceeded { estimated: usize, max: usize }, + Unsupported { reason: String }, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{DirectValueResult, PieceResult}; + + #[test] + fn optimized_result_is_unavailable() { + assert_eq!( + Availability::from(&EvaluationResult::Optimized), + Availability::OptimizedOut + ); + } + + #[test] + fn mixed_composite_result_is_partially_available() { + let result = EvaluationResult::Composite(vec![ + PieceResult { + location: EvaluationResult::DirectValue(DirectValueResult::RegisterValue(0)), + size: 4, + bit_offset: None, + }, + PieceResult { + location: EvaluationResult::Optimized, + size: 4, + bit_offset: Some(32), + }, + ]); + + assert_eq!( + Availability::from(&result), + Availability::PartiallyAvailable + ); + } +} diff --git a/ghostscope-dwarf/src/core/evaluation.rs b/ghostscope-dwarf/src/core/evaluation.rs index 784754ec..6d8f8b8b 100644 --- a/ghostscope-dwarf/src/core/evaluation.rs +++ b/ghostscope-dwarf/src/core/evaluation.rs @@ -1,18 +1,18 @@ -//! DWARF expression evaluation results for LLVM/eBPF code generation +//! DWARF expression evaluation results //! -//! This module defines the simplified representation of DWARF expressions -//! that can be directly converted to LLVM IR for eBPF code generation. +//! This module defines the internal representation produced while evaluating +//! raw DWARF expressions. The semantic planning layer converts these results +//! into public read plans before compiler code generation consumes them. //! //! Design principles: -//! 1. Optimize for eBPF constraints (read registers from pt_regs, read memory via bpf_probe_read_user) +//! 1. Preserve whether an expression describes a value or a location //! 2. Pre-compute as much as possible at compile time -//! 3. Clearly separate value semantics from location semantics -//! 4. Make register dependencies explicit for eBPF verification +//! 3. Keep register and memory dependencies explicit for later lowering use std::collections::BTreeMap; use std::fmt; -/// Result of evaluating a DWARF expression for eBPF code generation +/// Internal result of evaluating a DWARF expression #[derive(Debug, Clone, PartialEq)] pub enum EvaluationResult { /// Direct value - expression result is the variable value (no memory read needed) @@ -121,8 +121,10 @@ pub struct EntryValueCase { pub value_steps: Vec, } -/// Computation step for LLVM IR generation -/// These map directly to LLVM IR operations that can be generated in eBPF +/// Stack-machine computation step preserved for later runtime lowering. +/// +/// The compiler currently lowers these steps to LLVM IR for eBPF, but the DWARF +/// crate treats them as target-independent semantic operations. #[derive(Debug, Clone, PartialEq)] pub enum ComputeStep { /// Load register value from pt_regs diff --git a/ghostscope-dwarf/src/core/ids.rs b/ghostscope-dwarf/src/core/ids.rs new file mode 100644 index 00000000..ee1305d0 --- /dev/null +++ b/ghostscope-dwarf/src/core/ids.rs @@ -0,0 +1,50 @@ +//! Stable semantic identifiers used by higher-level DWARF queries. + +/// Stable identifier for a loaded module within one `DwarfAnalyzer`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ModuleId(pub u32); + +/// Stable identifier for a compilation unit within a loaded module. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct CuId(pub u32); + +/// Stable reference to a DIE within a loaded module. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct DieRef { + pub module: ModuleId, + pub cu: CuId, + /// Unit-relative or absolute offset normalized by the producer of the id. + pub offset: u64, +} + +/// Stable identifier for a type DIE. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct TypeId { + pub module: ModuleId, + pub cu: CuId, + pub die: DieRef, +} + +/// Stable identifier for a variable declaration DIE. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct VariableId { + pub declaration: DieRef, +} + +/// Stable identifier for a function DIE. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct FunctionId { + pub declaration: DieRef, +} + +/// Stable identifier for a lexical scope DIE. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ScopeId { + pub die: DieRef, +} + +/// Stable identifier for an inline context inside a concrete function. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct InlineContextId { + pub die: DieRef, +} diff --git a/ghostscope-dwarf/src/core/mod.rs b/ghostscope-dwarf/src/core/mod.rs index 7f9126b0..9094de9b 100644 --- a/ghostscope-dwarf/src/core/mod.rs +++ b/ghostscope-dwarf/src/core/mod.rs @@ -3,16 +3,21 @@ use crate::binary::DwarfReader; pub mod demangle; +pub mod diagnostic; pub mod errors; pub mod evaluation; +pub mod ids; pub mod mapping; +pub mod plan; pub mod symbol_names; pub mod types; pub use demangle::*; +pub use diagnostic::*; pub use errors::*; pub use evaluation::*; -pub use mapping::*; +pub use ids::*; +pub use plan::*; pub(crate) use symbol_names::*; pub use types::*; diff --git a/ghostscope-dwarf/src/core/plan.rs b/ghostscope-dwarf/src/core/plan.rs new file mode 100644 index 00000000..08ce1d74 --- /dev/null +++ b/ghostscope-dwarf/src/core/plan.rs @@ -0,0 +1,242 @@ +//! Neutral semantic plans produced before runtime-specific lowering. + +use crate::core::{ + ComputeStep, DirectValueResult, EvaluationResult, LocationResult, MemoryAccessSize, +}; +use std::fmt; + +/// Address expression that can be evaluated by a later lowering layer. +#[derive(Debug, Clone, PartialEq)] +pub struct AddressExpr { + pub steps: Vec, +} + +impl AddressExpr { + pub fn constant(address: u64) -> Self { + Self { + steps: vec![ComputeStep::PushConstant(address as i64)], + } + } + + pub fn register_relative(dwarf_reg: u16, offset: i64) -> Self { + let mut steps = vec![ComputeStep::LoadRegister(dwarf_reg)]; + if offset != 0 { + steps.push(ComputeStep::PushConstant(offset)); + steps.push(ComputeStep::Add); + } + Self { steps } + } +} + +/// PC-sensitive variable location before BPF lowering. +#[derive(Debug, Clone, PartialEq)] +pub enum VariableLocation { + Address(AddressExpr), + AbsoluteAddressValue(AddressExpr), + RegisterValue { dwarf_reg: u16 }, + RegisterAddress { dwarf_reg: u16, offset: i64 }, + FrameBaseRelative { offset: i64 }, + ComputedValue(Vec), + ComputedAddress(Vec), + ImplicitValue(Vec), + Pieces(Vec), + OptimizedOut, + Unknown, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct PieceLocation { + pub bit_offset: u32, + pub bit_size: u32, + pub location: Box, +} + +impl VariableLocation { + pub fn from_evaluation_result(result: &EvaluationResult) -> Self { + match result { + EvaluationResult::DirectValue(direct) => Self::from_direct_value(direct), + EvaluationResult::MemoryLocation(location) => Self::from_location_result(location), + EvaluationResult::Optimized => Self::OptimizedOut, + EvaluationResult::Composite(pieces) => Self::Pieces( + pieces + .iter() + .map(|piece| PieceLocation { + bit_offset: piece.bit_offset.unwrap_or(0).min(u32::MAX as u64) as u32, + bit_size: piece.size.saturating_mul(8).min(u32::MAX as u64) as u32, + location: Box::new(Self::from_evaluation_result(&piece.location)), + }) + .collect(), + ), + } + } + + fn from_direct_value(value: &DirectValueResult) -> Self { + match value { + DirectValueResult::Constant(value) => { + Self::ComputedValue(vec![ComputeStep::PushConstant(*value)]) + } + DirectValueResult::AbsoluteAddress(address) => { + Self::AbsoluteAddressValue(AddressExpr::constant(*address)) + } + DirectValueResult::ImplicitValue(bytes) => Self::ImplicitValue(bytes.clone()), + DirectValueResult::RegisterValue(dwarf_reg) => Self::RegisterValue { + dwarf_reg: *dwarf_reg, + }, + DirectValueResult::ComputedValue { steps, .. } => Self::ComputedValue(steps.clone()), + } + } + + fn from_location_result(location: &LocationResult) -> Self { + match location { + LocationResult::Address(address) => Self::Address(AddressExpr::constant(*address)), + LocationResult::RegisterAddress { + register, offset, .. + } => Self::RegisterAddress { + dwarf_reg: *register, + offset: offset.unwrap_or(0), + }, + LocationResult::ComputedLocation { steps } => Self::ComputedAddress(steps.clone()), + } + } +} + +impl From<&EvaluationResult> for VariableLocation { + fn from(value: &EvaluationResult) -> Self { + Self::from_evaluation_result(value) + } +} + +impl fmt::Display for VariableLocation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + VariableLocation::Address(expr) => { + write!(f, "[Memory] {}", location_display_for_address_expr(expr)) + } + VariableLocation::AbsoluteAddressValue(expr) => { + write!(f, "[DirectValue] ")?; + if let [ComputeStep::PushConstant(address)] = expr.steps.as_slice() { + DirectValueResult::AbsoluteAddress(*address as u64).fmt(f) + } else { + DirectValueResult::ComputedValue { + steps: expr.steps.clone(), + result_size: MemoryAccessSize::U64, + } + .fmt(f) + } + } + VariableLocation::RegisterValue { dwarf_reg } => { + write!(f, "[DirectValue] ")?; + DirectValueResult::RegisterValue(*dwarf_reg).fmt(f) + } + VariableLocation::RegisterAddress { dwarf_reg, offset } => { + write!(f, "[Memory] ")?; + LocationResult::RegisterAddress { + register: *dwarf_reg, + offset: Some(*offset), + size: None, + } + .fmt(f) + } + VariableLocation::FrameBaseRelative { offset } => { + if *offset >= 0 { + write!(f, "[Memory] @[frame_base+{offset}]") + } else { + write!(f, "[Memory] @[frame_base{offset}]") + } + } + VariableLocation::ComputedValue(steps) => { + write!(f, "[DirectValue] ")?; + DirectValueResult::ComputedValue { + steps: steps.clone(), + result_size: MemoryAccessSize::U64, + } + .fmt(f) + } + VariableLocation::ComputedAddress(steps) => { + write!(f, "[Memory] ")?; + LocationResult::ComputedLocation { + steps: steps.clone(), + } + .fmt(f) + } + VariableLocation::ImplicitValue(bytes) => { + write!(f, "[DirectValue] ")?; + DirectValueResult::ImplicitValue(bytes.clone()).fmt(f) + } + VariableLocation::Pieces(pieces) => write!(f, "Composite[{} pieces]", pieces.len()), + VariableLocation::OptimizedOut => write!(f, ""), + VariableLocation::Unknown => write!(f, ""), + } + } +} + +fn location_display_for_address_expr(expr: &AddressExpr) -> String { + if let [ComputeStep::PushConstant(address)] = expr.steps.as_slice() { + return format!("{}", LocationResult::Address(*address as u64)); + } + + format!( + "{}", + LocationResult::ComputedLocation { + steps: expr.steps.clone() + } + ) +} + +/// A memory read requested by a future runtime lowering pass. +#[derive(Debug, Clone, PartialEq)] +pub struct UserMemoryRead { + pub address: AddressExpr, + pub size: MemoryAccessSize, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{DirectValueResult, PieceResult}; + + #[test] + fn converts_register_address_location() { + let result = EvaluationResult::MemoryLocation(LocationResult::RegisterAddress { + register: 6, + offset: Some(-16), + size: None, + }); + + assert_eq!( + VariableLocation::from(&result), + VariableLocation::RegisterAddress { + dwarf_reg: 6, + offset: -16 + } + ); + } + + #[test] + fn converts_absolute_address_value_as_rebasable_value() { + let result = EvaluationResult::DirectValue(DirectValueResult::AbsoluteAddress(0x1234)); + + assert_eq!( + VariableLocation::from(&result), + VariableLocation::AbsoluteAddressValue(AddressExpr::constant(0x1234)) + ); + } + + #[test] + fn converts_composite_pieces() { + let result = EvaluationResult::Composite(vec![PieceResult { + location: EvaluationResult::DirectValue(DirectValueResult::RegisterValue(0)), + size: 4, + bit_offset: Some(32), + }]); + + assert_eq!( + VariableLocation::from(&result), + VariableLocation::Pieces(vec![PieceLocation { + bit_offset: 32, + bit_size: 32, + location: Box::new(VariableLocation::RegisterValue { dwarf_reg: 0 }), + }]) + ); + } +} diff --git a/ghostscope-dwarf/src/core/types.rs b/ghostscope-dwarf/src/core/types.rs index 3ea307f4..13df1b65 100644 --- a/ghostscope-dwarf/src/core/types.rs +++ b/ghostscope-dwarf/src/core/types.rs @@ -55,14 +55,6 @@ pub struct FunctionInfo { pub line_number: Option, } -/// Line information from debug_line (address-based) -#[derive(Debug, Clone)] -pub struct LineInfo { - pub line_number: u32, - pub file_path: String, - pub address: u64, -} - /// Cooked index entry - inspired by GDB's cooked_index_entry /// Extremely lightweight startup index, minimal memory footprint #[derive(Debug, Clone)] @@ -104,11 +96,6 @@ impl IndexEntry { self.function_kind() == FunctionDieKind::InlineInstance } - /// True when this DIE is a concrete, addressable subprogram body. - pub fn is_concrete_subprogram(&self) -> bool { - self.function_kind() == FunctionDieKind::ConcreteSubprogram - } - /// Return entry_pc when it is usable as this DIE's own entry address. /// /// Most DIEs with an entry_pc also carry ranges, and some producers emit @@ -168,8 +155,6 @@ pub struct LineEntry { pub column: u64, pub is_stmt: bool, pub prologue_end: bool, - pub epilogue_begin: bool, - pub end_sequence: bool, } /// Program section classification for global/static variables @@ -194,6 +179,3 @@ pub struct GlobalVariableInfo { pub die_offset: gimli::UnitOffset, pub unit_offset: gimli::DebugInfoOffset, } - -/// Re-export SectionOffsets from coordinator to keep a single definition/source of truth -pub use ghostscope_process::SectionOffsets; diff --git a/ghostscope-dwarf/src/dwarf_expr/lower.rs b/ghostscope-dwarf/src/dwarf_expr/lower.rs index 6406e181..164e11cb 100644 --- a/ghostscope-dwarf/src/dwarf_expr/lower.rs +++ b/ghostscope-dwarf/src/dwarf_expr/lower.rs @@ -1,6 +1,7 @@ //! DWARF expression evaluator //! -//! Converts DWARF location expressions to EvaluationResult for eBPF code generation +//! Converts raw DWARF location expressions into the crate's internal evaluator +//! representation before semantic planning lowers them into read plans. use crate::binary::{DwarfEndian, DwarfReader}; use crate::core::{ @@ -494,11 +495,11 @@ impl ExpressionEvaluator { // This marks the result as a computed value, not a memory location // Already handled by has_stack_value flag } - ParsedOperation::Operation(Operation::Deref { size, space, .. }) => { + ParsedOperation::Operation(op @ Operation::Deref { size, space, .. }) => { if *space { - return Err(anyhow::anyhow!( - "unsupported DWARF expression operation: {:?}", - op + return Err(crate::dwarf_expr::ops::unsupported_operation_error( + "DWARF expression", + op, )); } let mem_size = match size { @@ -517,10 +518,10 @@ impl ExpressionEvaluator { steps.push(ComputeStep::Dereference { size: mem_size }); } ParsedOperation::Operation(Operation::Nop) => {} - _ => { - return Err(anyhow::anyhow!( - "unsupported DWARF expression operation: {:?}", - op + ParsedOperation::Operation(op) => { + return Err(crate::dwarf_expr::ops::unsupported_operation_error( + "DWARF expression", + op, )); } } @@ -737,15 +738,23 @@ impl ExpressionEvaluator { } // These operations don't make sense as single operations - Operation::StackValue => Err(anyhow::anyhow!( - "unsupported single operation: DW_OP_stack_value" - )), - Operation::PlusConstant { .. } => Err(anyhow::anyhow!( - "unsupported single operation: DW_OP_plus_uconst without base" - )), - _ => Err(anyhow::anyhow!( - "unsupported single operation in fast path: {:?}", - op + Operation::StackValue => Err( + crate::dwarf_expr::ops::unsupported_operation_error_with_detail( + "single DWARF expression", + op, + "DW_OP_stack_value cannot be a standalone location expression", + ), + ), + Operation::PlusConstant { .. } => Err( + crate::dwarf_expr::ops::unsupported_operation_error_with_detail( + "single DWARF expression", + op, + "DW_OP_plus_uconst requires a base value", + ), + ), + _ => Err(crate::dwarf_expr::ops::unsupported_operation_error( + "single DWARF expression", + op, )), } } @@ -1274,7 +1283,10 @@ impl ExpressionEvaluator { #[cfg(test)] mod tests { use super::ExpressionEvaluator; - use crate::core::{CfaResult, DirectValueResult, EvaluationResult, LocationResult}; + use crate::core::{ + CfaResult, ComputeStep, DirectValueResult, EvaluationResult, LocationResult, + MemoryAccessSize, + }; use gimli::constants; use gimli::RunTimeEndian; @@ -1286,6 +1298,261 @@ mod tests { } } + fn encode_uleb(mut value: u64) -> Vec { + let mut out = Vec::new(); + loop { + let mut byte = (value & 0x7f) as u8; + value >>= 7; + if value != 0 { + byte |= 0x80; + } + out.push(byte); + if value == 0 { + break; + } + } + out + } + + fn encode_sleb(mut value: i64) -> Vec { + let mut out = Vec::new(); + loop { + let mut byte = (value as u8) & 0x7f; + value >>= 7; + let sign_bit_set = (byte & 0x40) != 0; + let done = (value == 0 && !sign_bit_set) || (value == -1 && sign_bit_set); + if !done { + byte |= 0x80; + } + out.push(byte); + if done { + break; + } + } + out + } + + fn parse_test_expr(bytes: &[u8]) -> anyhow::Result { + ExpressionEvaluator::parse_expression_with_context( + bytes, + RunTimeEndian::Little, + test_encoding(), + None, + 0, + None, + None, + None, + 0, + ) + } + + fn addr_expr(address: u64) -> Vec { + let mut bytes = vec![constants::DW_OP_addr.0]; + bytes.extend(address.to_le_bytes()); + bytes + } + + fn regx_expr(register: u64) -> Vec { + let mut bytes = vec![constants::DW_OP_regx.0]; + bytes.extend(encode_uleb(register)); + bytes + } + + fn bregx_expr(register: u64, offset: i64) -> Vec { + let mut bytes = vec![constants::DW_OP_bregx.0]; + bytes.extend(encode_uleb(register)); + bytes.extend(encode_sleb(offset)); + bytes + } + + #[test] + fn dwarf_op_supported_coverage_matrix() { + let cases = vec![ + ( + "DW_OP_regN", + vec![constants::DW_OP_reg5.0], + EvaluationResult::DirectValue(DirectValueResult::RegisterValue(5)), + ), + ( + "DW_OP_regx", + regx_expr(33), + EvaluationResult::DirectValue(DirectValueResult::RegisterValue(33)), + ), + ( + "DW_OP_bregN", + { + let mut bytes = vec![constants::DW_OP_breg7.0]; + bytes.extend(encode_sleb(8)); + bytes + }, + EvaluationResult::MemoryLocation(LocationResult::RegisterAddress { + register: 7, + offset: Some(8), + size: None, + }), + ), + ( + "DW_OP_bregx", + bregx_expr(33, -2), + EvaluationResult::MemoryLocation(LocationResult::RegisterAddress { + register: 33, + offset: Some(-2), + size: None, + }), + ), + ( + "DW_OP_addr", + addr_expr(0x1234), + EvaluationResult::MemoryLocation(LocationResult::Address(0x1234)), + ), + ( + "DW_OP_stack_value", + vec![constants::DW_OP_lit1.0, constants::DW_OP_stack_value.0], + EvaluationResult::DirectValue(DirectValueResult::Constant(1)), + ), + ( + "arithmetic stack value subset", + vec![ + constants::DW_OP_lit1.0, + constants::DW_OP_lit2.0, + constants::DW_OP_plus.0, + constants::DW_OP_stack_value.0, + ], + EvaluationResult::DirectValue(DirectValueResult::ComputedValue { + steps: vec![ + ComputeStep::PushConstant(1), + ComputeStep::PushConstant(2), + ComputeStep::Add, + ], + result_size: MemoryAccessSize::U64, + }), + ), + ( + "DW_OP_implicit_value", + vec![constants::DW_OP_implicit_value.0, 3, 0xaa, 0xbb, 0xcc], + EvaluationResult::DirectValue(DirectValueResult::ImplicitValue(vec![ + 0xaa, 0xbb, 0xcc, + ])), + ), + ]; + + for (name, bytes, expected) in cases { + let result = parse_test_expr(&bytes).unwrap_or_else(|error| { + panic!("{name} should parse successfully, bytes={bytes:?}: {error}") + }); + assert_eq!(result, expected, "{name} lowered incorrectly"); + } + } + + #[test] + fn dwarf_op_fbreg_coverage_uses_cfa_provider() { + let get_cfa = |_address| { + Ok(Some(CfaResult::RegisterPlusOffset { + register: 7, + offset: 16, + })) + }; + let mut expr = vec![constants::DW_OP_fbreg.0]; + expr.extend(encode_sleb(4)); + + let result = ExpressionEvaluator::parse_expression_with_context( + &expr, + RunTimeEndian::Little, + test_encoding(), + None, + 0, + Some(&get_cfa), + None, + None, + 0, + ) + .expect("DW_OP_fbreg should parse with a CFA provider"); + + assert_eq!( + result, + EvaluationResult::MemoryLocation(LocationResult::RegisterAddress { + register: 7, + offset: Some(20), + size: None, + }) + ); + } + + #[test] + fn dwarf_op_unsupported_diagnostic_matrix_names_ops() { + let cases = vec![ + ( + "DW_OP_drop", + vec![ + constants::DW_OP_lit1.0, + constants::DW_OP_drop.0, + constants::DW_OP_stack_value.0, + ], + "DW_OP_drop", + ), + ( + "DW_OP_piece", + { + let mut bytes = vec![constants::DW_OP_lit1.0, constants::DW_OP_piece.0]; + bytes.extend(encode_uleb(1)); + bytes + }, + "DW_OP_piece", + ), + ( + "DW_OP_bit_piece", + { + let mut bytes = vec![constants::DW_OP_lit1.0, constants::DW_OP_bit_piece.0]; + bytes.extend(encode_uleb(8)); + bytes.extend(encode_uleb(0)); + bytes + }, + "DW_OP_bit_piece", + ), + ( + "DW_OP_addrx", + { + let mut bytes = vec![constants::DW_OP_addrx.0]; + bytes.extend(encode_uleb(0)); + bytes + }, + "DW_OP_addrx", + ), + ( + "DW_OP_bra", + vec![ + constants::DW_OP_lit1.0, + constants::DW_OP_bra.0, + 0, + 0, + constants::DW_OP_stack_value.0, + ], + "DW_OP_bra", + ), + ]; + + for (name, bytes, expected_op) in cases { + let error = match parse_test_expr(&bytes) { + Ok(result) => panic!("{name} should be unsupported, got {result:?}"), + Err(error) => error, + }; + let message = error.to_string(); + assert!( + message.contains(expected_op), + "{name} diagnostic should mention {expected_op}, got: {message}" + ); + assert!( + message.contains("unsupported"), + "{name} diagnostic should be explicit, got: {message}" + ); + assert_eq!( + crate::dwarf_expr::ops::unsupported_op_from_error(&error), + Some(expected_op), + "{name} diagnostic should carry a typed unsupported-op cause" + ); + } + } + #[test] fn implicit_pointer_to_static_storage_preserves_absolute_address_semantics() { let result = ExpressionEvaluator::addressable_location_to_pointer_value( diff --git a/ghostscope-dwarf/src/dwarf_expr/ops.rs b/ghostscope-dwarf/src/dwarf_expr/ops.rs index c46970be..b4758943 100644 --- a/ghostscope-dwarf/src/dwarf_expr/ops.rs +++ b/ghostscope-dwarf/src/dwarf_expr/ops.rs @@ -3,6 +3,20 @@ use crate::core::Result; use gimli::{Operation, Reader}; +#[derive(Debug, thiserror::Error)] +#[error("unsupported {context} operation {op}: {detail}")] +pub(crate) struct UnsupportedDwarfOpError { + context: String, + op: &'static str, + detail: String, +} + +impl UnsupportedDwarfOpError { + pub(crate) fn op(&self) -> &'static str { + self.op + } +} + pub(crate) fn parse_ops( mut reader: R, encoding: gimli::Encoding, @@ -60,3 +74,103 @@ where } Ok(false) } + +pub(crate) fn operation_name(op: &Operation) -> &'static str +where + R: Reader, +{ + match op { + Operation::Deref { space: true, .. } => "DW_OP_xderef*", + Operation::Deref { size: 0, .. } => "DW_OP_deref", + Operation::Deref { .. } => "DW_OP_deref_size", + Operation::Drop => "DW_OP_drop", + Operation::Pick { index: 0 } => "DW_OP_dup", + Operation::Pick { index: 1 } => "DW_OP_over", + Operation::Pick { .. } => "DW_OP_pick", + Operation::Swap => "DW_OP_swap", + Operation::Rot => "DW_OP_rot", + Operation::Abs => "DW_OP_abs", + Operation::And => "DW_OP_and", + Operation::Div => "DW_OP_div", + Operation::Minus => "DW_OP_minus", + Operation::Mod => "DW_OP_mod", + Operation::Mul => "DW_OP_mul", + Operation::Neg => "DW_OP_neg", + Operation::Not => "DW_OP_not", + Operation::Or => "DW_OP_or", + Operation::Plus => "DW_OP_plus", + Operation::PlusConstant { .. } => "DW_OP_plus_uconst", + Operation::Shl => "DW_OP_shl", + Operation::Shr => "DW_OP_shr", + Operation::Shra => "DW_OP_shra", + Operation::Xor => "DW_OP_xor", + Operation::Bra { .. } => "DW_OP_bra", + Operation::Eq => "DW_OP_eq", + Operation::Ge => "DW_OP_ge", + Operation::Gt => "DW_OP_gt", + Operation::Le => "DW_OP_le", + Operation::Lt => "DW_OP_lt", + Operation::Ne => "DW_OP_ne", + Operation::Skip { .. } => "DW_OP_skip", + Operation::UnsignedConstant { .. } => "DW_OP_lit*/DW_OP_const*u", + Operation::SignedConstant { .. } => "DW_OP_const*s", + Operation::Register { .. } => "DW_OP_reg*/DW_OP_regx", + Operation::RegisterOffset { .. } => "DW_OP_breg*/DW_OP_bregx", + Operation::FrameOffset { .. } => "DW_OP_fbreg", + Operation::Nop => "DW_OP_nop", + Operation::PushObjectAddress => "DW_OP_push_object_address", + Operation::Call { .. } => "DW_OP_call*", + Operation::VariableValue { .. } => "DW_OP_GNU_variable_value", + Operation::TLS => "DW_OP_form_tls_address", + Operation::CallFrameCFA => "DW_OP_call_frame_cfa", + Operation::Piece { + bit_offset: Some(_), + .. + } => "DW_OP_bit_piece", + Operation::Piece { .. } => "DW_OP_piece", + Operation::ImplicitValue { .. } => "DW_OP_implicit_value", + Operation::StackValue => "DW_OP_stack_value", + Operation::ImplicitPointer { .. } => "DW_OP_implicit_pointer", + Operation::EntryValue { .. } => "DW_OP_entry_value", + Operation::ParameterRef { .. } => "DW_OP_GNU_parameter_ref", + Operation::Address { .. } => "DW_OP_addr", + Operation::AddressIndex { .. } => "DW_OP_addrx", + Operation::ConstantIndex { .. } => "DW_OP_constx", + Operation::TypedLiteral { .. } => "DW_OP_const_type", + Operation::Convert { .. } => "DW_OP_convert", + Operation::Reinterpret { .. } => "DW_OP_reinterpret", + Operation::Uninitialized => "DW_OP_GNU_uninit", + Operation::WasmLocal { .. } => "DW_OP_WASM_location(local)", + Operation::WasmGlobal { .. } => "DW_OP_WASM_location(global)", + Operation::WasmStack { .. } => "DW_OP_WASM_location(stack)", + } +} + +pub(crate) fn unsupported_operation_error(context: &str, op: &Operation) -> anyhow::Error +where + R: Reader, +{ + unsupported_operation_error_with_detail(context, op, format!("{op:?}")) +} + +pub(crate) fn unsupported_operation_error_with_detail( + context: &str, + op: &Operation, + detail: impl Into, +) -> anyhow::Error +where + R: Reader, +{ + UnsupportedDwarfOpError { + context: context.to_string(), + op: operation_name(op), + detail: detail.into(), + } + .into() +} + +pub(crate) fn unsupported_op_from_error(error: &anyhow::Error) -> Option<&'static str> { + error + .downcast_ref::() + .map(UnsupportedDwarfOpError::op) +} diff --git a/ghostscope-dwarf/src/index/block_index.rs b/ghostscope-dwarf/src/index/block_index.rs index 3cf7805f..6c6f5a08 100644 --- a/ghostscope-dwarf/src/index/block_index.rs +++ b/ghostscope-dwarf/src/index/block_index.rs @@ -159,15 +159,21 @@ impl FunctionBlocks { best_path } - /// Enumerate all VarRefs visible at PC (root + blocks on path) - pub fn variables_at_pc(&self, pc: u64) -> Vec { + /// Enumerate all VarRefs visible at PC with their lexical path depth. + pub fn variables_at_pc_with_scope_depth(&self, pc: u64) -> Vec<(VarRef, usize)> { if !self.function_contains_pc(pc) { return Vec::new(); } let path = self.block_path_for_pc(pc); let mut out = Vec::new(); - for idx in path { - out.extend(self.nodes[idx].variables.iter().cloned()); + for (scope_depth, idx) in path.into_iter().enumerate() { + out.extend( + self.nodes[idx] + .variables + .iter() + .cloned() + .map(|variable| (variable, scope_depth)), + ); } out } diff --git a/ghostscope-dwarf/src/index/cfi_index.rs b/ghostscope-dwarf/src/index/cfi_index.rs index 87fed663..296ce425 100644 --- a/ghostscope-dwarf/src/index/cfi_index.rs +++ b/ghostscope-dwarf/src/index/cfi_index.rs @@ -5,7 +5,11 @@ use crate::{ binary::{dwarf_endian_from_object, DwarfReader, MappedFile}, - core::{CallerFrameRecovery, CfaResult, ComputeStep, Result}, + core::{CallerFrameRecovery, CfaResult, ComputeStep, ModuleId, Result}, + semantics::{ + CfaRulePlan, CompactUnwindRow, CompactUnwindTable, RegisterRecoveryPlan, UnwindDiagnostic, + UnwindDiagnosticKind, + }, }; use anyhow::{anyhow, Context}; use gimli::{ @@ -212,6 +216,32 @@ impl CfiIndex { }) } + /// Compile all FDE rows into a compact unwind table for userspace/BPF planning. + pub fn compact_unwind_table(&self, module: ModuleId) -> Result { + let mut rows = Vec::new(); + let mut diagnostics = Vec::new(); + let mut entries = self.eh_frame.entries(&self.bases); + + while let Some(entry) = entries.next().context("Failed to iterate FDE entries")? { + match entry { + CieOrFde::Fde(partial_fde) => { + let fde = partial_fde + .parse(|_, bases, offset| self.eh_frame.cie_from_offset(bases, offset)) + .context("Failed to parse FDE")?; + self.append_compact_rows(module, &fde, &mut rows, &mut diagnostics)?; + } + CieOrFde::Cie(_) => {} + } + } + + rows.sort_by_key(|row| (row.pc_start, row.pc_end)); + Ok(CompactUnwindTable { + module, + rows, + diagnostics, + }) + } + /// Find FDE for given address using eh_frame_hdr if available fn find_fde_for_address( &self, @@ -281,6 +311,237 @@ impl CfiIndex { .cloned() } + fn append_compact_rows( + &self, + module: ModuleId, + fde: &FrameDescriptionEntry, + rows: &mut Vec, + diagnostics: &mut Vec, + ) -> Result<()> { + let return_address_register = fde.cie().return_address_register().0; + let mut ctx = UnwindContext::new(); + let mut table = fde + .rows(&self.eh_frame, &self.bases, &mut ctx) + .context("Failed to build unwind rows")?; + + while let Some(row) = table.next_row().context("Failed to evaluate unwind row")? { + let pc_start = row.start_address(); + let pc_end = row.end_address(); + if pc_start >= pc_end { + continue; + } + + let cfa = self.compact_cfa_rule(row.cfa(), pc_start, pc_end, diagnostics); + let return_address = self.compact_register_rule( + row.register(Register(return_address_register)), + return_address_register, + pc_start, + pc_end, + true, + diagnostics, + ); + let sp = self.compact_optional_register_rule( + row.register(Register(7)), + 7, + pc_start, + pc_end, + diagnostics, + ); + let rbp = self.compact_optional_register_rule( + row.register(Register(6)) + .or_else(|| Self::default_register_rule(6)), + 6, + pc_start, + pc_end, + diagnostics, + ); + let bpf_supported = cfa.is_bpf_fast_path_supported() + && return_address.is_bpf_fast_path_supported() + && sp + .as_ref() + .is_none_or(RegisterRecoveryPlan::is_bpf_fast_path_supported) + && rbp + .as_ref() + .is_none_or(RegisterRecoveryPlan::is_bpf_fast_path_supported); + + rows.push(CompactUnwindRow { + module, + pc_start, + pc_end, + cfa, + return_address_register, + return_address, + sp, + rbp, + bpf_supported, + }); + } + + Ok(()) + } + + fn compact_cfa_rule( + &self, + rule: &CfaRule, + pc_start: u64, + pc_end: u64, + diagnostics: &mut Vec, + ) -> CfaRulePlan { + match rule { + CfaRule::RegisterAndOffset { register, offset } => CfaRulePlan::RegPlusOffset { + register: register.0, + offset: *offset, + }, + CfaRule::Expression(expr) => match self.parse_unwind_expression(*expr) { + Ok(steps) => { + diagnostics.push(UnwindDiagnostic { + pc_start, + pc_end, + kind: UnwindDiagnosticKind::UnsupportedCfaRule { + reason: "CFA expression requires an expression template".to_string(), + }, + }); + CfaRulePlan::Expression { steps } + } + Err(error) => { + let reason = format!("failed to parse CFA expression: {error}"); + diagnostics.push(UnwindDiagnostic { + pc_start, + pc_end, + kind: UnwindDiagnosticKind::UnsupportedCfaRule { + reason: reason.clone(), + }, + }); + CfaRulePlan::Unsupported { reason } + } + }, + } + } + + fn compact_optional_register_rule( + &self, + rule: Option>, + register: u16, + pc_start: u64, + pc_end: u64, + diagnostics: &mut Vec, + ) -> Option { + let plan = self.compact_register_rule(rule, register, pc_start, pc_end, false, diagnostics); + if matches!(plan, RegisterRecoveryPlan::Undefined) { + None + } else { + Some(plan) + } + } + + fn compact_register_rule( + &self, + rule: Option>, + register: u16, + pc_start: u64, + pc_end: u64, + required: bool, + diagnostics: &mut Vec, + ) -> RegisterRecoveryPlan { + match rule { + Some(RegisterRule::Undefined) | None => { + if required { + diagnostics.push(UnwindDiagnostic { + pc_start, + pc_end, + kind: UnwindDiagnosticKind::MissingReturnAddressRule { register }, + }); + } + RegisterRecoveryPlan::Undefined + } + Some(RegisterRule::SameValue) => RegisterRecoveryPlan::SameValue { register }, + Some(RegisterRule::Register(other)) => { + RegisterRecoveryPlan::Register { register: other.0 } + } + Some(RegisterRule::Offset(offset)) => RegisterRecoveryPlan::AtCfaOffset { offset }, + Some(RegisterRule::ValOffset(offset)) => RegisterRecoveryPlan::ValCfaOffset { offset }, + Some(RegisterRule::Constant(value)) => { + self.push_unsupported_register_diagnostic( + register, + pc_start, + pc_end, + "constant register recovery is outside the BPF fast path", + diagnostics, + ); + RegisterRecoveryPlan::Constant { value } + } + Some(RegisterRule::Expression(expr)) => { + self.expression_register_plan(register, pc_start, pc_end, expr, true, diagnostics) + } + Some(RegisterRule::ValExpression(expr)) => { + self.expression_register_plan(register, pc_start, pc_end, expr, false, diagnostics) + } + Some(RegisterRule::Architectural) => { + let reason = "architectural register recovery is unsupported".to_string(); + self.push_unsupported_register_diagnostic( + register, + pc_start, + pc_end, + &reason, + diagnostics, + ); + RegisterRecoveryPlan::Unsupported { reason } + } + } + } + + fn expression_register_plan( + &self, + register: u16, + pc_start: u64, + pc_end: u64, + expr: gimli::UnwindExpression, + dereference: bool, + diagnostics: &mut Vec, + ) -> RegisterRecoveryPlan { + match self.parse_unwind_expression(expr) { + Ok(steps) => { + self.push_unsupported_register_diagnostic( + register, + pc_start, + pc_end, + "register expression requires an expression template", + diagnostics, + ); + RegisterRecoveryPlan::Expression { steps, dereference } + } + Err(error) => { + let reason = format!("failed to parse register expression: {error}"); + self.push_unsupported_register_diagnostic( + register, + pc_start, + pc_end, + &reason, + diagnostics, + ); + RegisterRecoveryPlan::Unsupported { reason } + } + } + } + + fn push_unsupported_register_diagnostic( + &self, + register: u16, + pc_start: u64, + pc_end: u64, + reason: &str, + diagnostics: &mut Vec, + ) { + diagnostics.push(UnwindDiagnostic { + pc_start, + pc_end, + kind: UnwindDiagnosticKind::UnsupportedRegisterRule { + register, + reason: reason.to_string(), + }, + }); + } + fn cfa_steps(&self, rule: &CfaRule) -> Result> { match rule { CfaRule::RegisterAndOffset { register, offset } => { diff --git a/ghostscope-dwarf/src/lib.rs b/ghostscope-dwarf/src/lib.rs index ff513d0d..9f2698b6 100644 --- a/ghostscope-dwarf/src/lib.rs +++ b/ghostscope-dwarf/src/lib.rs @@ -3,8 +3,8 @@ //! High-performance DWARF analysis library with on-demand loading architecture //! inspired by GDB's cooked index system. -// Core modules -pub mod core; +// Core implementation modules +pub(crate) mod core; // Internal implementation modules pub(crate) mod binary; @@ -13,7 +13,7 @@ pub(crate) mod index; pub(crate) mod loader; pub(crate) mod objfile; pub(crate) mod parser; -pub(crate) mod semantics; +pub mod semantics; // Main entry point pub mod analyzer; @@ -24,22 +24,22 @@ pub use analyzer::{ ModuleLoadingStats, ModuleStats, SharedLibraryInfo, SimpleFileInfo, }; -// Re-export essential core types +// Re-export essential core and semantic support types. pub use core::{ - // Evaluation types for LLVM codegen - CfaResult, - ComputeStep, - DirectValueResult, - DwarfError, - EvaluationResult, - FunctionInfo, - LocationResult, - MemoryAccessSize, - ModuleAddress, - PieceResult, - Result, - SourceLocation, - VariableInfo, + AddressExpr, AmbiguityReason, Availability, CallerFrameRecovery, CfaResult, ComputeStep, CuId, + DieRef, DwarfError, EntryValueCase, FunctionId, FunctionInfo, GlobalVariableInfo, HelperMode, + InlineContextId, MemoryAccessSize, ModuleAddress, ModuleId, Provenance, Result, + RuntimeCapabilities, RuntimeRequirement, ScopeId, SectionType, SourceLocation, TargetArch, + TypeId, UnsupportedReason, VariableId, VariableInfo, VariableLocation, VerifierRisk, +}; + +// Re-export semantic contract types. +pub use semantics::{ + AddressSpaceInfo, CfaRulePlan, CompactUnwindRow, CompactUnwindStats, CompactUnwindTable, + InlineFrame, PcContext, PcLineInfo, PcRange, RegisterRecoveryPlan, UnwindDiagnostic, + UnwindDiagnosticKind, VariableAccessPath, VariableAccessSegment, VariableLoweringKind, + VariableLoweringPlan, VariablePlan, VariableQueryDiagnostic, VariableReadPlan, VisibleVariable, + VisibleVariablesResult, }; // Re-export type definitions from protocol (avoiding circular dependencies) @@ -47,8 +47,5 @@ pub use ghostscope_protocol::{ EnumVariant, StructMember, TypeCache, TypeInfo, TypeKind, TypeQualifier, }; -// Re-export data types needed by external users -pub use parser::VariableWithEvaluation; - // Re-export gimli types that external users need pub use gimli::{constants, DwAte}; diff --git a/ghostscope-dwarf/src/objfile/access_planner.rs b/ghostscope-dwarf/src/objfile/access_planner.rs deleted file mode 100644 index b142eef6..00000000 --- a/ghostscope-dwarf/src/objfile/access_planner.rs +++ /dev/null @@ -1,767 +0,0 @@ -//! DWARF access planner: plan chain access using DIE-level traversal without -//! requiring full TypeInfo expansion. - -pub(crate) use crate::semantics::TypeLoc; -use crate::semantics::{resolve_type_ref_with_origins, strip_typedef_qualified}; -use crate::{ - binary::DwarfReader, - core::{attr_u64, EvaluationResult, Result}, - dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, -}; -use gimli::Reader; - -/// Utilities for DIE-level chain access planning -pub struct AccessPlanner<'dwarf> { - dwarf: &'dwarf gimli::Dwarf, - type_index: Option>, - strict_index: bool, -} - -/// Parent struct/class context for the final matched member. -#[derive(Debug, Clone)] -pub struct MemberParentCtx { - pub parent_cu_off: gimli::DebugInfoOffset, - pub parent_die_off: gimli::UnitOffset, - pub member_name: String, -} - -impl<'dwarf> AccessPlanner<'dwarf> { - pub fn new(dwarf: &'dwarf gimli::Dwarf) -> Self { - Self { - dwarf, - type_index: None, - strict_index: false, - } - } - - pub fn new_with_index( - dwarf: &'dwarf gimli::Dwarf, - type_index: std::sync::Arc, - strict_index: bool, - ) -> Self { - Self { - dwarf, - type_index: Some(type_index), - strict_index, - } - } - - /// Public wrapper for resolving DW_AT_type via origins/specification chain - pub fn resolve_type_ref_with_origins_public( - &self, - entry: &gimli::DebuggingInformationEntry, - unit: &gimli::Unit, - ) -> crate::core::Result> { - resolve_type_ref_with_origins(self.dwarf, entry, unit) - } - - /// If DIE is an explicit declaration, try to find a full definition across units. - /// - /// This must stay narrower than "childless aggregate". `die.has_children()` - /// only answers whether this DIE has inline member DIEs; it does not say - /// whether the DIE is a forward declaration. Empty definitions are valid - /// aggregates and legitimately have no children, so rebinding every - /// childless `struct Foo` by name can silently hop to an unrelated `Foo` - /// from another CU or namespace. - /// - /// The child flag still matters for member scanning after we have the final - /// DIE, but it must not be used as the trigger for declaration completion. - fn maybe_complete_aggregate( - &self, - unit: &gimli::Unit, - die: &gimli::DebuggingInformationEntry, - ) -> crate::core::Result<(Option, gimli::UnitOffset)> { - let mut is_decl = false; - if let Some(attr) = die.attr(gimli::DW_AT_declaration) { - is_decl = matches!(attr.value(), gimli::AttributeValue::Flag(true)); - } - - if !is_decl { - return Ok((None, die.offset())); - } - - let name_opt = if let Some(attr) = die.attr(gimli::DW_AT_name) { - self.dwarf - .attr_string(unit, attr.value()) - .ok() - .and_then(|s| s.to_string_lossy().ok().map(|cow| cow.into_owned())) - } else { - None - }; - - if name_opt.is_some() { - let name = name_opt.unwrap(); - let tag = die.tag(); - if let Some(tix) = &self.type_index { - if let Some(loc) = tix.find_aggregate_definition(&name, tag) { - return Ok((Some(loc.cu_offset), loc.die_offset)); - } - if self.strict_index { - return Err(anyhow::anyhow!( - "StrictIndex: missing definition for {} {:?}", - name, - tag - )); - } - } - // Non-strict: do not scan here anymore to reduce load; return original - return Ok((None, die.offset())); - } - Ok((None, die.offset())) - } - - /// Start planning from a known variable (skip variable search) - pub fn plan_chain_from_known( - &self, - mut current_cu_off: gimli::DebugInfoOffset, - type_die_off: gimli::UnitOffset, - mut current_eval: EvaluationResult, - chain: &[String], - ) -> Result<(EvaluationResult, TypeLoc, Option)> { - let mut current_type = TypeLoc { - cu_off: current_cu_off, - die_off: type_die_off, - }; - let mut idx = 0usize; - let mut last_parent_ctx: Option = None; - while idx < chain.len() { - let field = &chain[idx]; - current_type = strip_typedef_qualified(self.dwarf, current_type)?; - current_cu_off = current_type.cu_off; - - // Reacquire current unit on each step - let header_now = self.dwarf.unit_header(current_type.cu_off)?; - let unit_now = self.dwarf.unit(header_now)?; - let type_die = unit_now.entry(current_type.die_off)?; - - match type_die.tag() { - gimli::DW_TAG_pointer_type => { - // Dereference then continue without consuming field - current_eval = Self::compute_pointer_deref(current_eval); - if let Some(next) = - resolve_type_ref_with_origins(self.dwarf, &type_die, &unit_now)? - { - current_type = next; - } else { - return Ok((current_eval, current_type, last_parent_ctx)); - } - continue; - } - gimli::DW_TAG_structure_type | gimli::DW_TAG_class_type => { - // Ensure definition DIE; possibly switch unit - let (def_cu_opt, def_off) = - self.maybe_complete_aggregate(&unit_now, &type_die)?; - if let Some(cu_off) = def_cu_opt { - current_cu_off = cu_off; - } - // Reacquire possibly switched unit and read the definition DIE - let header_now2 = self.dwarf.unit_header(current_cu_off)?; - let unit_now2 = self.dwarf.unit(header_now2)?; - let def_die = unit_now2.entry(def_off)?; - // Only direct DW_TAG_member children belong to this aggregate. - // Nested class/struct DIEs may appear under a C++ aggregate, but - // their members are not direct members of the parent type. - let mut tree = unit_now2.entries_tree(Some(def_die.offset()))?; - let root = tree.root()?; - let mut children = root.children(); - let mut found_member = false; - while let Some(child) = children.next()? { - let e = child.entry(); - if e.tag() == gimli::DW_TAG_member { - if let Some(attr) = e.attr(gimli::DW_AT_name) { - if let Ok(s) = self.dwarf.attr_string(&unit_now2, attr.value()) { - if let Ok(s_str) = s.to_string_lossy() { - if s_str == field.as_str() { - // offset - let mut off: Option = None; - if let Some(a) = - e.attr(gimli::DW_AT_data_member_location) - { - match a.value() { - gimli::AttributeValue::Exprloc(expr) => { - off = expr_errors::hard( - DwarfExprMode::ConstOffset, - crate::dwarf_expr::const_eval::eval_const_offset( - &expr, - unit_now2.encoding(), - ), - )?; - } - value => off = attr_u64(value), - } - } - if off.is_none() { - if let Some(a) = - e.attr(gimli::DW_AT_data_bit_offset) - { - if let Some(v) = attr_u64(a.value()) { - off = Some(v / 8); - } - } - } - // Apply offset immediately if available - if let Some(off) = off { - use crate::core::{ - ComputeStep, EvaluationResult, LocationResult, - }; - current_eval = match current_eval { - EvaluationResult::MemoryLocation( - LocationResult::RegisterAddress { - register, - offset, - size, - }, - ) => { - let new_off = offset - .unwrap_or(0) - .saturating_add(off as i64); - EvaluationResult::MemoryLocation( - LocationResult::RegisterAddress { - register, - offset: Some(new_off), - size, - }, - ) - } - EvaluationResult::MemoryLocation( - LocationResult::Address(addr), - ) => EvaluationResult::MemoryLocation( - LocationResult::Address( - addr.saturating_add(off), - ), - ), - EvaluationResult::MemoryLocation( - LocationResult::ComputedLocation { - mut steps, - }, - ) => { - steps.push(ComputeStep::PushConstant( - off as i64, - )); - steps.push(ComputeStep::Add); - EvaluationResult::MemoryLocation( - LocationResult::ComputedLocation { - steps, - }, - ) - } - other => other, - }; - } - // type - let next_type = resolve_type_ref_with_origins( - self.dwarf, e, &unit_now2, - )?; - let parent_cu_off = current_cu_off; - current_type = next_type.unwrap_or(TypeLoc { - cu_off: current_cu_off, - die_off: current_type.die_off, - }); - last_parent_ctx = Some(MemberParentCtx { - parent_cu_off, - parent_die_off: def_off, - member_name: field.clone(), - }); - found_member = true; - break; - } - } - } - } - } - } - if found_member { - // consumed one field - idx += 1; - } else { - // Field not found on this aggregate — report an error instead of - // silently returning the base aggregate. - // Try to get a friendly type name for diagnostics - let type_name = if let Some(attr) = def_die.attr(gimli::DW_AT_name) { - if let Ok(s) = self.dwarf.attr_string(&unit_now2, attr.value()) { - s.to_string_lossy().ok().unwrap_or_default().into_owned() - } else { - String::new() - } - } else { - String::new() - }; - let msg = if type_name.is_empty() { - format!("member '{field}' not found") - } else { - format!("member '{field}' not found on type '{type_name}'") - }; - return Err(anyhow::anyhow!(msg)); - } - } - _ => { - // Can't descend further - return Ok((current_eval, current_type, last_parent_ctx)); - } - } - } - - Ok((current_eval, current_type, last_parent_ctx)) - } - - fn compute_pointer_deref(base: EvaluationResult) -> EvaluationResult { - use crate::core::{ComputeStep, DirectValueResult, LocationResult}; - match base { - EvaluationResult::MemoryLocation(LocationResult::RegisterAddress { - register, - offset, - .. - }) => { - let mut steps = Vec::new(); - steps.push(ComputeStep::LoadRegister(register)); - if let Some(off) = offset { - steps.push(ComputeStep::PushConstant(off)); - steps.push(ComputeStep::Add); - } - steps.push(ComputeStep::Dereference { - size: crate::core::MemoryAccessSize::U64, - }); - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { steps }) - } - EvaluationResult::MemoryLocation(LocationResult::Address(addr)) => { - let steps = vec![ - ComputeStep::PushConstant(addr as i64), - ComputeStep::Dereference { - size: crate::core::MemoryAccessSize::U64, - }, - ]; - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { steps }) - } - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { mut steps }) => { - steps.push(ComputeStep::Dereference { - size: crate::core::MemoryAccessSize::U64, - }); - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { steps }) - } - EvaluationResult::DirectValue(DirectValueResult::RegisterValue(register)) => { - EvaluationResult::MemoryLocation(LocationResult::RegisterAddress { - register, - offset: None, - size: None, - }) - } - EvaluationResult::DirectValue(DirectValueResult::Constant(value)) => { - EvaluationResult::MemoryLocation(LocationResult::Address(value as u64)) - } - EvaluationResult::DirectValue(DirectValueResult::AbsoluteAddress(value)) => { - EvaluationResult::MemoryLocation(LocationResult::Address(value)) - } - EvaluationResult::DirectValue(DirectValueResult::ImplicitValue(bytes)) => { - let mut value = 0u64; - for (idx, byte) in bytes.iter().take(8).enumerate() { - value |= (*byte as u64) << (idx * 8); - } - EvaluationResult::MemoryLocation(LocationResult::Address(value)) - } - EvaluationResult::DirectValue(DirectValueResult::ComputedValue { - mut steps, .. - }) => { - steps.push(ComputeStep::Dereference { - size: crate::core::MemoryAccessSize::U64, - }); - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { steps }) - } - other => other, - } - } - - // compute_add_offset removed (unused) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::binary::dwarf_reader_from_arc; - use crate::core::{FunctionDieKind, IndexEntry, IndexFlags}; - use crate::index::{LightweightIndex, TypeNameIndex}; - use gimli::constants; - use gimli::write::{ - AttributeValue as WriteAttributeValue, Dwarf as WriteDwarf, EndianVec, LineProgram, - Sections, Unit, - }; - use gimli::{DebugInfoOffset, Format, LittleEndian}; - use std::collections::HashMap; - use std::sync::Arc; - - type PlannerRegressionFixture = ( - gimli::Dwarf, - gimli::Unit, - gimli::UnitOffset, - DebugInfoOffset, - gimli::UnitOffset, - Arc, - ); - - fn build_declaration_completion_fixture() -> PlannerRegressionFixture { - let encoding = gimli::Encoding { - format: Format::Dwarf32, - version: 4, - address_size: 8, - }; - - let mut dwarf = WriteDwarf::new(); - let decl_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); - let def_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); - - { - let unit = dwarf.units.get_mut(decl_unit_id); - let root = unit.root(); - - let struct_id = unit.add(root, constants::DW_TAG_structure_type); - let struct_entry = unit.get_mut(struct_id); - struct_entry.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"Foo".to_vec()), - ); - struct_entry.set( - constants::DW_AT_declaration, - WriteAttributeValue::Flag(true), - ); - - let sibling_id = unit.add(root, constants::DW_TAG_subprogram); - let sibling = unit.get_mut(sibling_id); - sibling.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"later_sibling".to_vec()), - ); - } - - { - let unit = dwarf.units.get_mut(def_unit_id); - let root = unit.root(); - - let int_id = unit.add(root, constants::DW_TAG_base_type); - let int_entry = unit.get_mut(int_id); - int_entry.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"int".to_vec()), - ); - int_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); - int_entry.set( - constants::DW_AT_encoding, - WriteAttributeValue::Encoding(constants::DW_ATE_signed), - ); - - let struct_id = unit.add(root, constants::DW_TAG_structure_type); - let struct_entry = unit.get_mut(struct_id); - struct_entry.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"Foo".to_vec()), - ); - struct_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); - - let member_id = unit.add(struct_id, constants::DW_TAG_member); - let member = unit.get_mut(member_id); - member.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"x".to_vec()), - ); - member.set(constants::DW_AT_type, WriteAttributeValue::UnitRef(int_id)); - member.set( - constants::DW_AT_data_member_location, - WriteAttributeValue::Data1(0), - ); - } - - let mut sections = Sections::new(EndianVec::new(LittleEndian)); - dwarf.write(&mut sections).unwrap(); - - let dwarf_sections: gimli::DwarfSections> = gimli::DwarfSections::load(|id| { - Ok::<_, gimli::Error>( - sections - .get(id) - .map(|section| section.slice().to_vec()) - .unwrap_or_default(), - ) - }) - .unwrap(); - let read_dwarf = dwarf_sections - .borrow(|section| dwarf_reader_from_arc(Arc::<[u8]>::from(section.as_slice()))); - - let mut units = read_dwarf.units(); - let decl_header = units.next().unwrap().unwrap(); - let def_header = units.next().unwrap().unwrap(); - let def_cu_off = def_header.debug_info_offset().unwrap(); - - let decl_unit = read_dwarf.unit(decl_header).unwrap(); - let def_unit = read_dwarf.unit(def_header).unwrap(); - let decl_struct_off = find_struct_offset(&read_dwarf, &decl_unit, "Foo", true, false); - let def_struct_off = find_struct_offset(&read_dwarf, &def_unit, "Foo", false, true); - - let mut types = HashMap::new(); - types.insert( - "Foo".to_string(), - vec![IndexEntry { - name: Arc::from("Foo"), - die_offset: def_struct_off, - unit_offset: def_cu_off, - tag: constants::DW_TAG_structure_type, - flags: IndexFlags::default(), - language: None, - representative_addr: None, - entry_pc: None, - function_kind: FunctionDieKind::NotFunction, - }], - ); - let type_index = Arc::new(TypeNameIndex::build_from_lightweight( - &LightweightIndex::from_builder_data(HashMap::new(), HashMap::new(), types), - )); - - ( - read_dwarf, - decl_unit, - decl_struct_off, - def_cu_off, - def_struct_off, - type_index, - ) - } - - fn build_empty_definition_fixture() -> PlannerRegressionFixture { - let encoding = gimli::Encoding { - format: Format::Dwarf32, - version: 4, - address_size: 8, - }; - - let mut dwarf = WriteDwarf::new(); - let empty_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); - let full_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); - - { - let unit = dwarf.units.get_mut(empty_unit_id); - let root = unit.root(); - - let struct_id = unit.add(root, constants::DW_TAG_structure_type); - let struct_entry = unit.get_mut(struct_id); - struct_entry.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"Foo".to_vec()), - ); - // This is a real empty definition, not a forward declaration. - struct_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(1)); - } - - { - let unit = dwarf.units.get_mut(full_unit_id); - let root = unit.root(); - - let int_id = unit.add(root, constants::DW_TAG_base_type); - let int_entry = unit.get_mut(int_id); - int_entry.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"int".to_vec()), - ); - int_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); - int_entry.set( - constants::DW_AT_encoding, - WriteAttributeValue::Encoding(constants::DW_ATE_signed), - ); - - let struct_id = unit.add(root, constants::DW_TAG_structure_type); - let struct_entry = unit.get_mut(struct_id); - struct_entry.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"Foo".to_vec()), - ); - struct_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); - - let member_id = unit.add(struct_id, constants::DW_TAG_member); - let member = unit.get_mut(member_id); - member.set( - constants::DW_AT_name, - WriteAttributeValue::String(b"x".to_vec()), - ); - member.set(constants::DW_AT_type, WriteAttributeValue::UnitRef(int_id)); - member.set( - constants::DW_AT_data_member_location, - WriteAttributeValue::Data1(0), - ); - } - - let mut sections = Sections::new(EndianVec::new(LittleEndian)); - dwarf.write(&mut sections).unwrap(); - - let dwarf_sections: gimli::DwarfSections> = gimli::DwarfSections::load(|id| { - Ok::<_, gimli::Error>( - sections - .get(id) - .map(|section| section.slice().to_vec()) - .unwrap_or_default(), - ) - }) - .unwrap(); - let read_dwarf = dwarf_sections - .borrow(|section| dwarf_reader_from_arc(Arc::<[u8]>::from(section.as_slice()))); - - let mut units = read_dwarf.units(); - let empty_header = units.next().unwrap().unwrap(); - let full_header = units.next().unwrap().unwrap(); - let full_cu_off = full_header.debug_info_offset().unwrap(); - - let empty_unit = read_dwarf.unit(empty_header).unwrap(); - let full_unit = read_dwarf.unit(full_header).unwrap(); - let empty_struct_off = find_struct_offset(&read_dwarf, &empty_unit, "Foo", false, false); - let full_struct_off = find_struct_offset(&read_dwarf, &full_unit, "Foo", false, true); - - let mut types = HashMap::new(); - types.insert( - "Foo".to_string(), - vec![IndexEntry { - name: Arc::from("Foo"), - die_offset: full_struct_off, - unit_offset: full_cu_off, - tag: constants::DW_TAG_structure_type, - flags: IndexFlags::default(), - language: None, - representative_addr: None, - entry_pc: None, - function_kind: FunctionDieKind::NotFunction, - }], - ); - let type_index = Arc::new(TypeNameIndex::build_from_lightweight( - &LightweightIndex::from_builder_data(HashMap::new(), HashMap::new(), types), - )); - - ( - read_dwarf, - empty_unit, - empty_struct_off, - full_cu_off, - full_struct_off, - type_index, - ) - } - - fn find_struct_offset( - dwarf: &gimli::Dwarf, - unit: &gimli::Unit, - expected_name: &str, - expected_is_declaration: bool, - expected_has_children: bool, - ) -> gimli::UnitOffset { - let mut entries = unit.entries(); - while let Some(entry) = entries.next_dfs().unwrap() { - if entry.tag() != constants::DW_TAG_structure_type { - continue; - } - let Some(attr) = entry.attr(constants::DW_AT_name) else { - continue; - }; - let Ok(name) = dwarf.attr_string(unit, attr.value()) else { - continue; - }; - let Ok(name) = name.to_string_lossy() else { - continue; - }; - let is_declaration = matches!( - entry.attr(constants::DW_AT_declaration), - Some(attr) if matches!(attr.value(), gimli::AttributeValue::Flag(true)) - ); - if name == expected_name - && is_declaration == expected_is_declaration - && entry.has_children() == expected_has_children - { - return entry.offset(); - } - } - panic!( - "missing struct {expected_name} with declaration={expected_is_declaration} \ - and has_children={expected_has_children}" - ); - } - - fn legacy_has_children_via_next_dfs( - unit: &gimli::Unit, - die: &gimli::DebuggingInformationEntry, - ) -> bool { - let mut entries = unit.entries_at_offset(die.offset()).unwrap(); - let _ = entries.next_entry().unwrap(); - entries.next_dfs().unwrap().is_some() - } - - #[test] - fn maybe_complete_aggregate_uses_declaration_flag_despite_later_siblings() { - let (dwarf, decl_unit, decl_struct_off, def_cu_off, def_struct_off, type_index) = - build_declaration_completion_fixture(); - let decl_struct_die = decl_unit.entry(decl_struct_off).unwrap(); - let mut legacy_cursor = decl_unit.entries_at_offset(decl_struct_off).unwrap(); - assert!(legacy_cursor.next_entry().unwrap()); - let next_after_decl = legacy_cursor.next_dfs().unwrap().unwrap(); - - assert!(!decl_struct_die.has_children()); - assert_eq!(next_after_decl.depth(), 0); - assert_eq!(next_after_decl.tag(), constants::DW_TAG_subprogram); - assert!(legacy_has_children_via_next_dfs( - &decl_unit, - &decl_struct_die - )); - - let planner = AccessPlanner::new_with_index(&dwarf, type_index, false); - let (resolved_cu, resolved_die) = planner - .maybe_complete_aggregate(&decl_unit, &decl_struct_die) - .unwrap(); - - assert_eq!(resolved_cu, Some(def_cu_off)); - assert_eq!(resolved_die, def_struct_off); - } - - #[test] - fn maybe_complete_aggregate_does_not_rebind_empty_definitions() { - let (dwarf, empty_unit, empty_struct_off, full_cu_off, full_struct_off, type_index) = - build_empty_definition_fixture(); - let empty_struct_die = empty_unit.entry(empty_struct_off).unwrap(); - - assert!(!empty_struct_die.has_children()); - assert!(empty_struct_die - .attr(constants::DW_AT_declaration) - .is_none()); - - let planner = AccessPlanner::new_with_index(&dwarf, type_index, false); - let (resolved_cu, resolved_die) = planner - .maybe_complete_aggregate(&empty_unit, &empty_struct_die) - .unwrap(); - - assert_eq!(resolved_cu, None); - assert_eq!(resolved_die, empty_struct_off); - assert_ne!(resolved_die, full_struct_off); - assert_ne!(resolved_cu, Some(full_cu_off)); - } - - #[test] - fn pointer_deref_handles_entry_value_materialized_computed_values() { - let eval = EvaluationResult::DirectValue(crate::core::DirectValueResult::ComputedValue { - steps: vec![crate::core::ComputeStep::PushConstant(0x2000)], - result_size: crate::core::MemoryAccessSize::U64, - }); - assert_eq!( - AccessPlanner::compute_pointer_deref(eval), - EvaluationResult::MemoryLocation(crate::core::LocationResult::ComputedLocation { - steps: vec![ - crate::core::ComputeStep::PushConstant(0x2000), - crate::core::ComputeStep::Dereference { - size: crate::core::MemoryAccessSize::U64, - }, - ], - }) - ); - } - - #[test] - fn pointer_deref_handles_direct_register_values() { - let eval = EvaluationResult::DirectValue(crate::core::DirectValueResult::RegisterValue(12)); - assert_eq!( - AccessPlanner::compute_pointer_deref(eval), - EvaluationResult::MemoryLocation(crate::core::LocationResult::RegisterAddress { - register: 12, - offset: None, - size: None, - }) - ); - } -} diff --git a/ghostscope-dwarf/src/objfile/globals.rs b/ghostscope-dwarf/src/objfile/globals.rs index 9351db51..3b766480 100644 --- a/ghostscope-dwarf/src/objfile/globals.rs +++ b/ghostscope-dwarf/src/objfile/globals.rs @@ -1,79 +1,9 @@ -use super::{variables::ChainSpec, LoadedObjfile}; -use crate::core::{GlobalVariableInfo, Result, SectionType}; +use super::LoadedObjfile; +use crate::core::{GlobalVariableInfo, SectionType}; use object::{Object, ObjectSection}; use std::collections::HashSet; impl LoadedObjfile { - pub(crate) fn compute_global_member_static_offset( - &self, - cu_off: gimli::DebugInfoOffset, - var_die: gimli::UnitOffset, - link_address: u64, - fields: &[String], - ) -> Result> { - let planned = self.plan_chain_access_from_var( - 0, - cu_off, - var_die, - var_die, - ChainSpec { - base: "__global__", - fields, - }, - super::variables::VariableEvalContext { - get_cfa: None, - function_context: None, - }, - )?; - let Some(var) = planned else { - return Ok(None); - }; - - use crate::core::{ComputeStep, EvaluationResult, LocationResult}; - let abs_addr_opt = match &var.evaluation_result { - EvaluationResult::MemoryLocation(LocationResult::Address(a)) => Some(*a), - EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { steps }) => { - let mut st: Vec = Vec::new(); - let mut foldable = true; - for s in steps { - match s { - ComputeStep::PushConstant(v) => st.push(*v), - ComputeStep::Add => { - if st.len() >= 2 { - let b = st.pop().unwrap(); - let a = st.pop().unwrap(); - st.push(a.saturating_add(b)); - } else { - foldable = false; - break; - } - } - _ => { - foldable = false; - break; - } - } - } - if foldable && st.len() == 1 { - Some(st[0] as u64) - } else { - None - } - } - _ => None, - }; - - if let Some(abs) = abs_addr_opt { - let off = abs.saturating_sub(link_address); - let final_ty = var - .dwarf_type - .unwrap_or(crate::TypeInfo::UnknownType { name: "".into() }); - return Ok(Some((off, final_ty))); - } - - Ok(None) - } - pub(crate) fn find_global_variables_by_name_any(&self, name: &str) -> Vec { let base = self.find_global_variables_by_name(name); if !base.is_empty() { diff --git a/ghostscope-dwarf/src/objfile/loaded.rs b/ghostscope-dwarf/src/objfile/loaded.rs index 47d89dd2..aa3e70ca 100644 --- a/ghostscope-dwarf/src/objfile/loaded.rs +++ b/ghostscope-dwarf/src/objfile/loaded.rs @@ -103,6 +103,26 @@ impl LoadedObjfile { } } + pub(crate) fn compact_unwind_table( + &self, + module: crate::ModuleId, + ) -> Result> { + match &self.cfi_index { + Some(cfi) => Ok(Some(cfi.compact_unwind_table(module)?)), + None => Ok(None), + } + } + + pub(crate) fn compact_unwind_row( + &self, + module: crate::ModuleId, + pc: u64, + ) -> Result> { + Ok(self + .compact_unwind_table(module)? + .and_then(|table| table.row_for_pc(pc).cloned())) + } + pub(crate) fn vaddr_to_file_offset(&self, vaddr: u64) -> Option { if self._binary_mapped_file.data.is_empty() { return None; diff --git a/ghostscope-dwarf/src/objfile/mod.rs b/ghostscope-dwarf/src/objfile/mod.rs index cf020e0c..e083a4d8 100644 --- a/ghostscope-dwarf/src/objfile/mod.rs +++ b/ghostscope-dwarf/src/objfile/mod.rs @@ -1,6 +1,5 @@ //! Loaded object file: complete DWARF data for a single binary -pub(crate) mod access_planner; pub(crate) mod function_lookup; pub(crate) mod globals; pub(crate) mod loaded; diff --git a/ghostscope-dwarf/src/objfile/variables.rs b/ghostscope-dwarf/src/objfile/variables.rs index ed2fb958..0eeb1c4e 100644 --- a/ghostscope-dwarf/src/objfile/variables.rs +++ b/ghostscope-dwarf/src/objfile/variables.rs @@ -1,21 +1,84 @@ -use super::{access_planner::AccessPlanner, LoadedObjfile}; +use super::LoadedObjfile; use crate::{ - core::Result, - index::{BlockIndexBuilder, FunctionBlocks}, - parser::{DetailedParser, ExpressionEvaluator}, - semantics::{resolve_attr_with_unit_origins, resolve_name_with_origins}, + core::{ + attr_u64, Availability, FunctionId, InlineContextId, Result, ScopeId, UnsupportedReason, + }, + index::{BlockIndexBuilder, FunctionBlocks, VarRef}, + parser::ExpressionEvaluator, + semantics::{ + resolve_attr_with_unit_origins, resolve_name_with_origins, resolve_origin_entry, + resolve_type_ref_with_origins, InlineFrame, PcLineInfo, VariableQueryDiagnostic, + }, }; use gimli::Reader; -use std::{sync::Arc, time::Instant}; -pub(super) struct ChainSpec<'a> { - pub base: &'a str, - pub fields: &'a [String], +type PcScopes = ( + Option, + Option, + Vec, + Vec, +); + +fn cu_id(cu_offset: gimli::DebugInfoOffset) -> crate::CuId { + crate::CuId(cu_offset.0 as u32) +} + +fn die_ref( + module: crate::ModuleId, + cu_offset: gimli::DebugInfoOffset, + die_offset: gimli::UnitOffset, +) -> crate::DieRef { + crate::DieRef { + module, + cu: cu_id(cu_offset), + offset: die_offset.0 as u64, + } } -pub(super) struct VariableEvalContext<'a> { - pub get_cfa: Option<&'a dyn Fn(u64) -> Result>>, - pub function_context: Option<&'a FunctionBlocks>, +fn type_id( + module: crate::ModuleId, + cu_offset: gimli::DebugInfoOffset, + die_offset: gimli::UnitOffset, +) -> crate::TypeId { + let cu = cu_id(cu_offset); + crate::TypeId { + module, + cu, + die: crate::DieRef { + module, + cu, + offset: die_offset.0 as u64, + }, + } +} + +fn complete_aggregate_declaration_entry( + dwarf: &gimli::Dwarf, + type_name_index: &crate::index::TypeNameIndex, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, +) -> Option<(gimli::DebugInfoOffset, gimli::UnitOffset)> { + match entry.tag() { + gimli::constants::DW_TAG_structure_type + | gimli::constants::DW_TAG_class_type + | gimli::constants::DW_TAG_union_type + | gimli::constants::DW_TAG_enumeration_type => {} + _ => return None, + } + + let is_declaration = entry + .attr(gimli::constants::DW_AT_declaration) + .is_some_and(|attr| matches!(attr.value(), gimli::AttributeValue::Flag(true))); + if !is_declaration { + return None; + } + + let name = entry + .attr(gimli::constants::DW_AT_name) + .and_then(|attr| dwarf.attr_string(unit, attr.value()).ok()) + .and_then(|name| name.to_string_lossy().ok().map(|name| name.into_owned()))?; + let definition = type_name_index.find_aggregate_definition(&name, entry.tag())?; + Some((definition.cu_offset, definition.die_offset)) } impl LoadedObjfile { @@ -65,6 +128,61 @@ impl LoadedObjfile { true } + fn cu_name_from_unit(&self, unit: &gimli::Unit) -> Option { + let mut entries = unit.entries(); + let entry = entries.next_dfs().ok()??; + let name = entry.attr_value(gimli::constants::DW_AT_name)?; + self.dwarf() + .attr_string(unit, name) + .ok()? + .to_string_lossy() + .ok() + .map(|name| name.into_owned()) + } + + fn attr_file_index(value: gimli::AttributeValue) -> Option { + match value { + gimli::AttributeValue::FileIndex(index) => Some(index), + other => attr_u64(other), + } + } + + fn inline_call_site_info( + &self, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, + address: u64, + ) -> Option { + let line_number = entry + .attr_value(gimli::constants::DW_AT_call_line) + .and_then(attr_u64) + .and_then(|line| u32::try_from(line).ok())?; + let column = entry + .attr_value(gimli::constants::DW_AT_call_column) + .and_then(attr_u64) + .and_then(|column| u32::try_from(column).ok()) + .filter(|column| *column > 0); + let cu_name = self.cu_name_from_unit(unit); + let file_path = entry + .attr_value(gimli::constants::DW_AT_call_file) + .and_then(Self::attr_file_index) + .and_then(|file_index| { + cu_name.as_deref().and_then(|cu_name| { + self.scoped_file_manager + .lookup_by_scoped_index(cu_name, file_index) + }) + }) + .or_else(|| cu_name.clone()) + .unwrap_or_default(); + + Some(PcLineInfo { + file_path, + line_number, + column, + address, + }) + } + pub(crate) fn is_inline_at(&self, address: u64) -> Option { self.ensure_block_index_for_address(address); @@ -92,135 +210,75 @@ impl LoadedObjfile { Some(false) } - pub(super) fn plan_chain_access_from_var( + + pub(crate) fn resolve_pc_scopes( &self, + module: crate::ModuleId, address: u64, - cu_offset: gimli::DebugInfoOffset, - subprogram_die: gimli::UnitOffset, - var_die: gimli::UnitOffset, - chain: ChainSpec<'_>, - eval_context: VariableEvalContext<'_>, - ) -> Result> { - tracing::info!( - "DWARF:plan_from_var addr=0x{:x} cu_off={:?} subprogram={:?} var_die={:?} base='{}' chain_len={}", - address, - cu_offset, - subprogram_die, - var_die, - chain.base, - chain.fields.len() - ); - let header = self.dwarf.unit_header(cu_offset)?; - let unit = self.dwarf.unit(header)?; - let var_entry = unit.entry(var_die)?; - - let base_var = self.detailed_parser.parse_variable_entry_with_mode( - &var_entry, - &unit, - &self.dwarf, - address, - eval_context.get_cfa, - eval_context.function_context, - self.cfi_index.as_ref(), - 0, - )?; - tracing::debug!("DWARF:plan_from_var done"); - let Some(base_var) = base_var else { - return Ok(None); - }; - let current_eval = base_var.evaluation_result.clone(); - - let planner = - AccessPlanner::new_with_index(self.dwarf(), Arc::clone(&self.type_name_index), true); - let mut final_eval = current_eval.clone(); - let mut final_type_loc = None; - let mut parent_ctx = None; - - if !chain.fields.is_empty() { - let t1 = std::time::Instant::now(); - let type_loc = planner - .resolve_type_ref_with_origins_public(&var_entry, &unit)? - .ok_or_else(|| anyhow::anyhow!("variable has no DW_AT_type"))?; - tracing::info!( - "DWARF:plan_from_var resolve_type_ref_ms={}", - t1.elapsed().as_millis() - ); - - let t2 = std::time::Instant::now(); - let (fe, ftl, pctx) = planner.plan_chain_from_known( - type_loc.cu_off, - type_loc.die_off, - current_eval, - chain.fields, - )?; - final_eval = fe; - final_type_loc = Some(ftl); - parent_ctx = pctx; - tracing::info!( - "DWARF:plan_from_var planner_ms={}", - t2.elapsed().as_millis() - ); - } + ) -> Result { + self.ensure_block_index_for_address(address); - let mut final_type = None; - if let Some(ftl) = final_type_loc { - let t3 = std::time::Instant::now(); - let h = self.dwarf.unit_header(ftl.cu_off)?; - let u = self.dwarf.unit(h)?; - let mut shallow_final = - DetailedParser::resolve_type_shallow_at_offset(&self.dwarf, &u, ftl.die_off); - tracing::info!( - "DWARF:plan_from_var final_type_ms={}", - t3.elapsed().as_millis() - ); + let func = match self + .block_index + .read() + .expect("block index lock poisoned") + .find_function_by_pc(address) + .cloned() + { + Some(func) => func, + None => return Ok((None, None, Vec::new(), Vec::new())), + }; - if let Some(ctx) = parent_ctx { - let h = self.dwarf.unit_header(ctx.parent_cu_off)?; - let u = self.dwarf.unit(h)?; - if let Some( - crate::TypeInfo::StructType { members, .. } - | crate::TypeInfo::UnionType { members, .. }, - ) = DetailedParser::resolve_type_shallow_at_offset( - &self.dwarf, - &u, - ctx.parent_die_off, - ) { - if let Some(m) = members.iter().find(|m| m.name == ctx.member_name) { - tracing::info!( - "DWARF:parent_enrich member='{}' uses BitfieldType={}", - ctx.member_name, - matches!(m.member_type, crate::TypeInfo::BitfieldType { .. }) - ); - shallow_final = Some(m.member_type.clone()); - } + let header = self.dwarf().unit_header(func.cu_offset)?; + let unit = self.dwarf().unit(header)?; + let cu = cu_id(func.cu_offset); + let function_die = die_ref(module, func.cu_offset, func.die_offset); + let function = Some(FunctionId { + declaration: function_die, + }); + let mut lexical_scopes = Vec::new(); + let mut inline_chain = Vec::new(); + + for node_index in func.block_path_for_pc(address) { + let Some(die_offset) = func.nodes[node_index].die_offset else { + continue; + }; + let entry = unit.entry(die_offset)?; + let die = die_ref(module, func.cu_offset, die_offset); + match entry.tag() { + gimli::constants::DW_TAG_lexical_block => { + lexical_scopes.push(ScopeId { die }); } + gimli::constants::DW_TAG_inlined_subroutine => { + let abstract_origin = entry + .attr_value(gimli::constants::DW_AT_abstract_origin) + .and_then(|value| { + resolve_origin_entry(self.dwarf(), &unit, value) + .ok() + .flatten() + .and_then(|(_, origin_unit, origin_entry)| { + origin_unit.header.debug_info_offset().map(|origin_cu| { + die_ref(module, origin_cu, origin_entry.offset()) + }) + }) + }); + let function_name = resolve_name_with_origins(self.dwarf(), &unit, &entry) + .ok() + .flatten(); + let call_site = self.inline_call_site_info(&unit, &entry, address); + inline_chain.push(InlineFrame { + context: Some(InlineContextId { die }), + call_site, + abstract_origin, + concrete_die: die, + function_name, + }); + } + _ => {} } - - final_type = shallow_final; } - let (type_name, dwarf_type) = if let Some(t) = final_type.clone() { - (t.type_name(), Some(t)) - } else { - (base_var.type_name.clone(), None) - }; - - let name = if chain.fields.is_empty() { - chain.base.to_string() - } else { - format!("{base}.", base = chain.base) + &chain.fields.join(".") - }; - let var = crate::parser::VariableWithEvaluation { - name, - type_name, - dwarf_type, - evaluation_result: final_eval, - scope_depth: 0, - is_parameter: base_var.is_parameter, - is_artificial: base_var.is_artificial, - }; - tracing::debug!("DWARF:plan_from_var done"); - Ok(Some(var)) + Ok((Some(cu), function, lexical_scopes, inline_chain)) } fn resolve_variables_by_offsets_at_address_with_cfa( @@ -230,9 +288,30 @@ impl LoadedObjfile { get_cfa: Option<&dyn Fn(u64) -> Result>>, function_context: Option<&FunctionBlocks>, cfi_index: Option<&crate::index::CfiIndex>, - ) -> Result> { + ) -> Result> { + let items_with_depths = items + .iter() + .map(|(cu_off, die_off)| (*cu_off, *die_off, 0)) + .collect::>(); + self.resolve_variables_by_offsets_at_address_with_cfa_and_depths( + address, + &items_with_depths, + get_cfa, + function_context, + cfi_index, + ) + } + + fn resolve_variables_by_offsets_at_address_with_cfa_and_depths( + &self, + address: u64, + items: &[(gimli::DebugInfoOffset, gimli::UnitOffset, usize)], + get_cfa: Option<&dyn Fn(u64) -> Result>>, + function_context: Option<&FunctionBlocks>, + cfi_index: Option<&crate::index::CfiIndex>, + ) -> Result> { let mut vars = Vec::with_capacity(items.len()); - for (cu_off, die_off) in items.iter().cloned() { + for (cu_off, die_off, scope_depth) in items.iter().cloned() { let header = self.dwarf.unit_header(cu_off)?; let unit = self.dwarf.unit(header)?; let entry = unit.entry(die_off)?; @@ -244,7 +323,7 @@ impl LoadedObjfile { get_cfa, function_context, cfi_index, - 0, + scope_depth, )? { vars.push(v); } @@ -286,376 +365,142 @@ impl LoadedObjfile { None } - pub(crate) fn get_all_variables_at_address( + pub(crate) fn get_visible_variables_at_address_best_effort_with_diagnostics( &self, + module: crate::ModuleId, address: u64, - ) -> Result> { - let t0 = Instant::now(); - let mut built_funcs: usize = 0; - let mut build_ms: u128 = 0; - tracing::info!( - "DWARF:get_vars module='{}' addr=0x{:x}", - self.module_mapping.path.display(), - address - ); - - if self - .block_index - .read() - .expect("block index lock poisoned") - .find_function_by_pc(address) - .is_none() - { - let b0 = Instant::now(); - if let Some(cu_off) = self.lightweight_index.find_cu_by_address(address) { - let builder = BlockIndexBuilder::new(self.dwarf()); - if let Some(funcs) = builder.build_for_unit(cu_off) { - tracing::info!( - "BlockIndex: built {} functions for CU {:?}", - funcs.len(), - cu_off - ); - let funcs_len = funcs.len(); - if self.add_block_index_functions_if_missing(address, funcs) { - built_funcs += funcs_len; - } - } - } - build_ms = b0.elapsed().as_millis(); - } + ) -> Result<( + Vec, + Vec, + )> { + self.ensure_block_index_for_address(address); - if let Some(func) = self + let func = self .block_index .read() .expect("block index lock poisoned") .find_function_by_pc(address) .cloned() - { - let vars_in_func = func.nodes.iter().map(|n| n.variables.len()).sum::(); - tracing::info!( - "DWARF:get_vars fast_path_hit addr=0x{:x} vars_in_func={} built_funcs={} build_ms={} total_ms={}", - address, - vars_in_func, - built_funcs, - build_ms, - t0.elapsed().as_millis() - ); - let fb_result = self.compute_frame_base_for_pc(&func, address); - let cfa_result = if fb_result.is_none() { - if self.cfi_index.is_some() { - match self.get_cfa_result(address) { - Ok(Some(cfa)) => Some(cfa), - _ => None, - } - } else { - None + .ok_or_else(|| { + anyhow::anyhow!( + "StrictIndex: no function found for address 0x{:x} in block index", + address + ) + })?; + + let fb_result = self.compute_frame_base_for_pc(&func, address); + let cfa_result = if fb_result.is_none() { + if self.cfi_index.is_some() { + match self.get_cfa_result(address) { + Ok(Some(cfa)) => Some(cfa), + _ => None, } } else { None - }; - let get_cfa_closure = move |addr: u64| -> Result> { - if addr == address { - if let Some(fb) = fb_result.clone() { - return Ok(Some(fb)); - } - return Ok(cfa_result.clone()); + } + } else { + None + }; + let get_cfa_closure = move |addr: u64| -> Result> { + if addr == address { + if let Some(fb) = fb_result.clone() { + return Ok(Some(fb)); } - Ok(None) - }; - let var_refs = func.variables_at_pc(address); - if !var_refs.is_empty() { - let items: Vec<(gimli::DebugInfoOffset, gimli::UnitOffset)> = var_refs - .iter() - .map(|v| (v.cu_offset, v.die_offset)) - .collect(); - let cfi_index = self.cfi_index.clone(); - let mut vars = self.resolve_variables_by_offsets_at_address_with_cfa( + return Ok(cfa_result.clone()); + } + Ok(None) + }; + + let var_refs = func.variables_at_pc_with_scope_depth(address); + let cfi_index = self.cfi_index.clone(); + let dwarf_ref = self.dwarf(); + let mut variables = Vec::with_capacity(var_refs.len()); + let mut diagnostics = Vec::new(); + + for (var_ref, scope_depth) in var_refs { + let item = [(var_ref.cu_offset, var_ref.die_offset, scope_depth)]; + let mut resolved = match self + .resolve_variables_by_offsets_at_address_with_cfa_and_depths( address, - &items, + &item, Some(&get_cfa_closure), Some(&func), cfi_index.as_ref(), - )?; - - let dwarf_ref = self.dwarf(); - for (idx, var_out) in vars.iter_mut().enumerate() { - if var_out.dwarf_type.is_none() { - let vr = &var_refs[idx]; - if let Ok(header) = dwarf_ref.unit_header(vr.cu_offset) { - if let Ok(unit) = dwarf_ref.unit(header) { - if let Ok(entry) = unit.entry(vr.die_offset) { - let planner = AccessPlanner::new(dwarf_ref); - if let Ok(Some(type_loc)) = - planner.resolve_type_ref_with_origins_public(&entry, &unit) - { - if let Some(ty) = self.detailed_shallow_type( - type_loc.cu_off, - type_loc.die_off, - ) { - var_out.type_name = ty.type_name(); - var_out.dwarf_type = Some(ty); - } - } - } - } - } - } - } - - // Keep the parser's direct DWARF result for inline parameters as-is. - // We intentionally do not remap optimized inline params from nested - // call_site_parameter DIEs here: those describe the callee's argument - // locations/values, not the inline function's original parameters, and - // using them here caused false aliases like "original_x = RDI". - // TODO(inline_params): If we want to recover optimized inline params in - // these cases, it has to come from a complete DW_OP_entry_value path that - // evaluates caller-side call-site values, not from the inline body's own - // nested call-site subtree. - let mut seen_param_names: std::collections::HashSet = - std::collections::HashSet::new(); - let mut filtered: Vec = - Vec::with_capacity(vars.len()); - for v in vars.into_iter() { - if v.is_parameter { - if seen_param_names.insert(v.name.clone()) { - filtered.push(v); - } - } else { - filtered.push(v); - } - } - - tracing::info!( - "DWARF:get_vars resolved {} vars total_ms={}", - filtered.len(), - t0.elapsed().as_millis() - ); - return Ok(filtered); - } - } - - Err(anyhow::anyhow!( - "StrictIndex: no function found for address 0x{:x} in block index", - address - )) - } - - pub(crate) fn plan_chain_access( - &self, - address: u64, - base_var: &str, - chain: &[String], - ) -> Result> { - let t0 = Instant::now(); - let mut built_funcs: usize = 0; - let mut build_ms: u128 = 0; - tracing::info!( - "DWARF:plan_chain module='{}' addr=0x{:x} base='{}' chain_len={}", - self.module_mapping.path.display(), - address, - base_var, - chain.len() - ); - - if self - .block_index - .read() - .expect("block index lock poisoned") - .find_function_by_pc(address) - .is_none() - { - let b0 = Instant::now(); - let builder = BlockIndexBuilder::new(self.dwarf()); - if let Some(func_entry) = self.find_function_index_entry_by_address(address) { - if let Some(fb) = - builder.build_for_function(func_entry.unit_offset, func_entry.die_offset) - { - if self.add_block_index_functions_if_missing(address, vec![fb]) { - built_funcs += 1; - } - } - } else if let Some(cu_off) = self.lightweight_index.find_cu_by_address(address) { - if let Some(funcs) = builder.build_for_unit(cu_off) { - let funcs_len = funcs.len(); - if self.add_block_index_functions_if_missing(address, funcs) { - built_funcs += funcs_len; - } - } - } - build_ms = b0.elapsed().as_millis(); - } - - if let Some(func) = self - .block_index - .read() - .expect("block index lock poisoned") - .find_function_by_pc(address) - .cloned() - { - let cfa_result = if self.cfi_index.is_some() { - match self.get_cfa_result(address) { - Ok(Some(cfa)) => Some(cfa), - _ => None, + ) { + Ok(vars) => vars, + Err(error) => { + let detail = error.to_string(); + diagnostics.push(VariableQueryDiagnostic { + pc: address, + name: self.variable_name_for_ref(&var_ref), + scope_depth, + availability: Self::variable_eval_error_availability(&error, &detail), + detail, + }); + tracing::debug!( + "Skipping visible variable at 0x{:x} due to DWARF evaluation error: {}", + address, + error + ); + continue; } - } else { - None }; - let get_cfa_closure = move |addr: u64| -> Result> { - if addr == address { - Ok(cfa_result.clone()) - } else { - Ok(None) - } + + let Some(mut variable) = resolved.pop() else { + continue; }; + variable.declaration = Some(die_ref(module, var_ref.cu_offset, var_ref.die_offset)); - let dwarf = self.dwarf(); - let header = dwarf.unit_header(func.cu_offset)?; - let unit = dwarf.unit(header)?; - let candidates = func.variables_at_pc(address); - tracing::info!( - "DWARF:plan_chain fast_path_hit addr=0x{:x} candidates={} built_funcs={} build_ms={}", - address, - candidates.len(), - built_funcs, - build_ms - ); - let mut cand_names: Vec = Vec::new(); - for v in &candidates { - let e = unit.entry(v.die_offset)?; - if let Some(name) = resolve_name_with_origins(dwarf, &unit, &e)? { - cand_names.push(name); - } - } - tracing::info!("DWARF:plan_chain candidates_names={:?}", cand_names); - - for v in candidates { - let e = unit.entry(v.die_offset)?; - if let Some(n) = resolve_name_with_origins(dwarf, &unit, &e)? { - if n == base_var || n.starts_with(&format!("{base_var}@")) { - if chain.is_empty() { - let one = vec![(func.cu_offset, v.die_offset)]; - let t1 = Instant::now(); - let cfi_index = self.cfi_index.clone(); - let vars = self.resolve_variables_by_offsets_at_address_with_cfa( - address, - &one, - Some(&get_cfa_closure), - Some(&func), - cfi_index.as_ref(), - )?; - let mut var_opt = vars.into_iter().next(); - let mut type_ms = 0u128; - if let Some(ref mut var0) = var_opt { - if var0.dwarf_type.is_none() { - let dwarf = self.dwarf(); - let header = dwarf.unit_header(func.cu_offset)?; - let unit = dwarf.unit(header)?; - let e = unit.entry(v.die_offset)?; - let planner = AccessPlanner::new(dwarf); - if let Some(type_loc) = - planner.resolve_type_ref_with_origins_public(&e, &unit)? - { - let tstart = Instant::now(); - if let Some(ty) = self.detailed_shallow_type( - type_loc.cu_off, - type_loc.die_off, - ) { - type_ms = tstart.elapsed().as_millis(); - var0.type_name = ty.type_name(); - var0.dwarf_type = Some(ty); - } - } + if let Ok(header) = dwarf_ref.unit_header(var_ref.cu_offset) { + if let Ok(unit) = dwarf_ref.unit(header) { + if let Ok(entry) = unit.entry(var_ref.die_offset) { + if let Ok(Some(type_loc)) = + resolve_type_ref_with_origins(dwarf_ref, &entry, &unit) + { + variable.type_id = + Some(type_id(module, type_loc.cu_off, type_loc.die_off)); + if variable.dwarf_type.is_none() { + if let Some(ty) = + self.detailed_shallow_type(type_loc.cu_off, type_loc.die_off) + { + variable.type_name = ty.type_name(); + variable.dwarf_type = Some(ty); } } - tracing::info!( - "DWARF:plan_chain var_match='{}' resolve_base_ms={} type_ms={} total_ms={}", - n, - t1.elapsed().as_millis(), - type_ms, - t0.elapsed().as_millis() - ); - return Ok(var_opt); } - - let t1 = Instant::now(); - let res = self.plan_chain_access_from_var( - address, - func.cu_offset, - func.die_offset, - v.die_offset, - ChainSpec { - base: base_var, - fields: chain, - }, - VariableEvalContext { - get_cfa: Some(&get_cfa_closure), - function_context: Some(&func), - }, - )?; - tracing::info!( - "DWARF:plan_chain var_match='{}' plan_ms={} total_ms={}", - n, - t1.elapsed().as_millis(), - t0.elapsed().as_millis() - ); - return Ok(res); } } } - } - let globals = self.find_global_variables_by_name(base_var); - if !globals.is_empty() { - for info in globals { - match self.plan_chain_access_from_var( - address, - info.unit_offset, - info.die_offset, - info.die_offset, - ChainSpec { - base: base_var, - fields: chain, - }, - VariableEvalContext { - get_cfa: None, - function_context: None, - }, - ) { - Ok(Some(v)) => { - tracing::info!( - "DWARF:plan_chain(global) success base='{}' total_ms={}", - base_var, - t0.elapsed().as_millis() - ); - return Ok(Some(v)); - } - Ok(None) => continue, - Err(e) => { - tracing::debug!( - "DWARF:plan_chain(global) candidate failed for base='{}': {}", - base_var, - e - ); - continue; - } - } - } + variables.push(variable); } - let err = anyhow::anyhow!( - "StrictIndex: no function found for address 0x{:x} or no matching base var '{}' (plan_chain)", - address, - base_var - ); - tracing::info!( - "DWARF:plan_chain miss addr=0x{:x} built_funcs={} build_ms={} total_ms={} err={}", - address, - built_funcs, - build_ms, - t0.elapsed().as_millis(), - err - ); - Err(err) + let mut seen_param_names = std::collections::HashSet::new(); + variables.retain(|variable| { + !variable.is_parameter + || seen_param_names.insert((variable.name.clone(), variable.scope_depth)) + }); + Ok((variables, diagnostics)) + } + + fn variable_name_for_ref(&self, var_ref: &VarRef) -> Option { + let dwarf = self.dwarf(); + let header = dwarf.unit_header(var_ref.cu_offset).ok()?; + let unit = dwarf.unit(header).ok()?; + let entry = unit.entry(var_ref.die_offset).ok()?; + resolve_name_with_origins(dwarf, &unit, &entry) + .ok() + .flatten() + } + + fn variable_eval_error_availability(error: &anyhow::Error, detail: &str) -> Availability { + if let Some(op) = crate::dwarf_expr::ops::unsupported_op_from_error(error) { + Availability::Unsupported(UnsupportedReason::DwarfOp { op: op.to_string() }) + } else { + Availability::Unsupported(UnsupportedReason::ExpressionShape { + detail: detail.to_string(), + }) + } } fn compute_frame_base_for_pc( @@ -792,6 +637,19 @@ impl LoadedObjfile { let dwarf = self.dwarf(); let header = dwarf.unit_header(cu_off).ok()?; let unit = dwarf.unit(header).ok()?; + let entry = unit.entry(die_off).ok()?; + if let Some((def_cu_off, def_die_off)) = + complete_aggregate_declaration_entry(dwarf, &self.type_name_index, &unit, &entry) + { + let def_header = dwarf.unit_header(def_cu_off).ok()?; + let def_unit = dwarf.unit(def_header).ok()?; + return crate::parser::DetailedParser::resolve_type_shallow_at_offset( + dwarf, + &def_unit, + def_die_off, + ); + } + crate::parser::DetailedParser::resolve_type_shallow_at_offset(dwarf, &unit, die_off) } @@ -804,8 +662,7 @@ impl LoadedObjfile { let header = dwarf.unit_header(cu_off).ok()?; let unit = dwarf.unit(header).ok()?; let entry = unit.entry(die_off).ok()?; - let planner = AccessPlanner::new(dwarf); - match planner.resolve_type_ref_with_origins_public(&entry, &unit) { + match resolve_type_ref_with_origins(dwarf, &entry, &unit) { Ok(Some(type_loc)) => self.detailed_shallow_type(type_loc.cu_off, type_loc.die_off), _ => None, } @@ -815,7 +672,332 @@ impl LoadedObjfile { &self, address: u64, items: &[(gimli::DebugInfoOffset, gimli::UnitOffset)], - ) -> Result> { + ) -> Result> { self.resolve_variables_by_offsets_at_address_with_cfa(address, items, None, None, None) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::binary::{dwarf_reader_from_arc, DwarfReader}; + use crate::core::{FunctionDieKind, IndexEntry, IndexFlags}; + use crate::index::{LightweightIndex, TypeNameIndex}; + use gimli::constants; + use gimli::write::{ + AttributeValue as WriteAttributeValue, Dwarf as WriteDwarf, EndianVec, LineProgram, + Sections, Unit, + }; + use gimli::{DebugInfoOffset, Format, LittleEndian}; + use std::collections::HashMap; + use std::sync::Arc; + + type AggregateFixture = ( + gimli::Dwarf, + gimli::Unit, + gimli::UnitOffset, + DebugInfoOffset, + gimli::UnitOffset, + Arc, + ); + + fn build_declaration_completion_fixture() -> AggregateFixture { + let encoding = gimli::Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 8, + }; + + let mut dwarf = WriteDwarf::new(); + let decl_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); + let def_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); + + { + let unit = dwarf.units.get_mut(decl_unit_id); + let root = unit.root(); + + let struct_id = unit.add(root, constants::DW_TAG_structure_type); + let struct_entry = unit.get_mut(struct_id); + struct_entry.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"Foo".to_vec()), + ); + struct_entry.set( + constants::DW_AT_declaration, + WriteAttributeValue::Flag(true), + ); + + let sibling_id = unit.add(root, constants::DW_TAG_subprogram); + let sibling = unit.get_mut(sibling_id); + sibling.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"later_sibling".to_vec()), + ); + } + + { + let unit = dwarf.units.get_mut(def_unit_id); + let root = unit.root(); + + let int_id = unit.add(root, constants::DW_TAG_base_type); + let int_entry = unit.get_mut(int_id); + int_entry.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"int".to_vec()), + ); + int_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); + int_entry.set( + constants::DW_AT_encoding, + WriteAttributeValue::Encoding(constants::DW_ATE_signed), + ); + + let struct_id = unit.add(root, constants::DW_TAG_structure_type); + let struct_entry = unit.get_mut(struct_id); + struct_entry.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"Foo".to_vec()), + ); + struct_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); + + let member_id = unit.add(struct_id, constants::DW_TAG_member); + let member = unit.get_mut(member_id); + member.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"x".to_vec()), + ); + member.set(constants::DW_AT_type, WriteAttributeValue::UnitRef(int_id)); + member.set( + constants::DW_AT_data_member_location, + WriteAttributeValue::Data1(0), + ); + } + + build_fixture_from_dwarf(dwarf, true, false) + } + + fn build_empty_definition_fixture() -> AggregateFixture { + let encoding = gimli::Encoding { + format: Format::Dwarf32, + version: 4, + address_size: 8, + }; + + let mut dwarf = WriteDwarf::new(); + let empty_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); + let full_unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); + + { + let unit = dwarf.units.get_mut(empty_unit_id); + let root = unit.root(); + + let struct_id = unit.add(root, constants::DW_TAG_structure_type); + let struct_entry = unit.get_mut(struct_id); + struct_entry.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"Foo".to_vec()), + ); + struct_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(1)); + } + + { + let unit = dwarf.units.get_mut(full_unit_id); + let root = unit.root(); + + let int_id = unit.add(root, constants::DW_TAG_base_type); + let int_entry = unit.get_mut(int_id); + int_entry.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"int".to_vec()), + ); + int_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); + int_entry.set( + constants::DW_AT_encoding, + WriteAttributeValue::Encoding(constants::DW_ATE_signed), + ); + + let struct_id = unit.add(root, constants::DW_TAG_structure_type); + let struct_entry = unit.get_mut(struct_id); + struct_entry.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"Foo".to_vec()), + ); + struct_entry.set(constants::DW_AT_byte_size, WriteAttributeValue::Data1(4)); + + let member_id = unit.add(struct_id, constants::DW_TAG_member); + let member = unit.get_mut(member_id); + member.set( + constants::DW_AT_name, + WriteAttributeValue::String(b"x".to_vec()), + ); + member.set(constants::DW_AT_type, WriteAttributeValue::UnitRef(int_id)); + member.set( + constants::DW_AT_data_member_location, + WriteAttributeValue::Data1(0), + ); + } + + build_fixture_from_dwarf(dwarf, false, false) + } + + fn build_fixture_from_dwarf( + mut dwarf: WriteDwarf, + source_is_declaration: bool, + source_has_children: bool, + ) -> AggregateFixture { + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + dwarf.write(&mut sections).unwrap(); + + let dwarf_sections: gimli::DwarfSections> = gimli::DwarfSections::load(|id| { + Ok::<_, gimli::Error>( + sections + .get(id) + .map(|section| section.slice().to_vec()) + .unwrap_or_default(), + ) + }) + .unwrap(); + let read_dwarf = dwarf_sections + .borrow(|section| dwarf_reader_from_arc(Arc::<[u8]>::from(section.as_slice()))); + + let mut units = read_dwarf.units(); + let source_header = units.next().unwrap().unwrap(); + let full_header = units.next().unwrap().unwrap(); + let source_cu_off = source_header.debug_info_offset().unwrap(); + let full_cu_off = full_header.debug_info_offset().unwrap(); + + let source_unit = read_dwarf.unit(source_header).unwrap(); + let full_unit = read_dwarf.unit(full_header).unwrap(); + let source_struct_off = find_struct_offset( + &read_dwarf, + &source_unit, + "Foo", + source_is_declaration, + source_has_children, + ); + let full_struct_off = find_struct_offset(&read_dwarf, &full_unit, "Foo", false, true); + + let mut types = HashMap::new(); + types.insert( + "Foo".to_string(), + vec![IndexEntry { + name: Arc::from("Foo"), + die_offset: full_struct_off, + unit_offset: full_cu_off, + tag: constants::DW_TAG_structure_type, + flags: IndexFlags::default(), + language: None, + representative_addr: None, + entry_pc: None, + function_kind: FunctionDieKind::NotFunction, + }], + ); + let type_index = Arc::new(TypeNameIndex::build_from_lightweight( + &LightweightIndex::from_builder_data(HashMap::new(), HashMap::new(), types), + )); + + ( + read_dwarf, + source_unit, + source_struct_off, + if source_is_declaration { + full_cu_off + } else { + source_cu_off + }, + full_struct_off, + type_index, + ) + } + + fn find_struct_offset( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + expected_name: &str, + expected_is_declaration: bool, + expected_has_children: bool, + ) -> gimli::UnitOffset { + let mut entries = unit.entries(); + while let Some(entry) = entries.next_dfs().unwrap() { + if entry.tag() != constants::DW_TAG_structure_type { + continue; + } + let Some(attr) = entry.attr(constants::DW_AT_name) else { + continue; + }; + let Ok(name) = dwarf.attr_string(unit, attr.value()) else { + continue; + }; + let Ok(name) = name.to_string_lossy() else { + continue; + }; + let is_declaration = matches!( + entry.attr(constants::DW_AT_declaration), + Some(attr) if matches!(attr.value(), gimli::AttributeValue::Flag(true)) + ); + if name == expected_name + && is_declaration == expected_is_declaration + && entry.has_children() == expected_has_children + { + return entry.offset(); + } + } + panic!( + "missing struct {expected_name} with declaration={expected_is_declaration} \ + and has_children={expected_has_children}" + ); + } + + fn legacy_has_children_via_next_dfs( + unit: &gimli::Unit, + die: &gimli::DebuggingInformationEntry, + ) -> bool { + let mut entries = unit.entries_at_offset(die.offset()).unwrap(); + let _ = entries.next_entry().unwrap(); + entries.next_dfs().unwrap().is_some() + } + + #[test] + fn aggregate_completion_uses_declaration_flag_despite_later_siblings() { + let (dwarf, decl_unit, decl_struct_off, def_cu_off, def_struct_off, type_index) = + build_declaration_completion_fixture(); + let decl_struct_die = decl_unit.entry(decl_struct_off).unwrap(); + let mut legacy_cursor = decl_unit.entries_at_offset(decl_struct_off).unwrap(); + assert!(legacy_cursor.next_entry().unwrap()); + let next_after_decl = legacy_cursor.next_dfs().unwrap().unwrap(); + + assert!(!decl_struct_die.has_children()); + assert_eq!(next_after_decl.depth(), 0); + assert_eq!(next_after_decl.tag(), constants::DW_TAG_subprogram); + assert!(legacy_has_children_via_next_dfs( + &decl_unit, + &decl_struct_die + )); + + let resolved = + complete_aggregate_declaration_entry(&dwarf, &type_index, &decl_unit, &decl_struct_die); + + assert_eq!(resolved, Some((def_cu_off, def_struct_off))); + } + + #[test] + fn aggregate_completion_does_not_rebind_empty_definitions() { + let (dwarf, empty_unit, empty_struct_off, _source_cu_off, full_struct_off, type_index) = + build_empty_definition_fixture(); + let empty_struct_die = empty_unit.entry(empty_struct_off).unwrap(); + + assert!(!empty_struct_die.has_children()); + assert!(empty_struct_die + .attr(constants::DW_AT_declaration) + .is_none()); + + let resolved = complete_aggregate_declaration_entry( + &dwarf, + &type_index, + &empty_unit, + &empty_struct_die, + ); + + assert_eq!(resolved, None); + assert_ne!(empty_struct_off, full_struct_off); + } +} diff --git a/ghostscope-dwarf/src/parser/detailed_parser.rs b/ghostscope-dwarf/src/parser/detailed_parser.rs index d6d2a0ef..e067c806 100644 --- a/ghostscope-dwarf/src/parser/detailed_parser.rs +++ b/ghostscope-dwarf/src/parser/detailed_parser.rs @@ -7,11 +7,13 @@ use crate::{ binary::DwarfReader, - core::{attr_u64, EvaluationResult, Result}, + core::{attr_u64, Availability, EvaluationResult, Result, VariableLocation}, dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, index::{CfiIndex, FunctionBlocks}, parser::ExpressionEvaluator, - semantics::{resolve_name_with_origins, resolve_type_ref_in_same_unit_with_origins}, + semantics::{ + resolve_name_with_origins, resolve_type_ref_in_same_unit_with_origins, VisibleVariable, + }, TypeInfo, }; use gimli::Reader; @@ -29,18 +31,45 @@ use gimli::constants::{ use std::collections::HashSet; // no tracing imports needed here -/// Variable with complete information including EvaluationResult +/// Internal variable record produced before semantic planning. #[derive(Debug, Clone)] pub struct VariableWithEvaluation { pub name: String, pub type_name: String, pub dwarf_type: Option, + pub declaration: Option, + pub type_id: Option, pub evaluation_result: EvaluationResult, pub scope_depth: usize, pub is_parameter: bool, pub is_artificial: bool, } +impl VariableWithEvaluation { + pub fn semantic_location(&self) -> VariableLocation { + VariableLocation::from(&self.evaluation_result) + } + + pub fn availability(&self) -> Availability { + Availability::from(&self.evaluation_result) + } + + pub fn visible_variable(&self) -> VisibleVariable { + VisibleVariable { + name: self.name.clone(), + type_name: self.type_name.clone(), + dwarf_type: self.dwarf_type.clone(), + declaration: self.declaration, + type_id: self.type_id, + location: self.semantic_location(), + availability: self.availability(), + scope_depth: self.scope_depth, + is_parameter: self.is_parameter, + is_artificial: self.is_artificial, + } + } +} + // Removed full traversal request/context types in shallow mode /// Detailed DWARF parser for tree traversal and variable collection @@ -769,6 +798,8 @@ impl DetailedParser { name, type_name, dwarf_type, + declaration: None, + type_id: None, evaluation_result, scope_depth, is_parameter, @@ -904,3 +935,51 @@ impl Default for DetailedParser { Self::new() } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{Availability, DirectValueResult, VariableLocation}; + + #[test] + fn variable_reports_register_value_location() { + let variable = VariableWithEvaluation { + name: "arg".to_string(), + type_name: "int".to_string(), + dwarf_type: None, + declaration: None, + type_id: None, + evaluation_result: EvaluationResult::DirectValue(DirectValueResult::RegisterValue(5)), + scope_depth: 1, + is_parameter: true, + is_artificial: false, + }; + + assert_eq!(variable.name, "arg"); + assert_eq!(variable.availability(), Availability::Available); + assert_eq!( + variable.semantic_location(), + VariableLocation::RegisterValue { dwarf_reg: 5 } + ); + assert!(variable.is_parameter); + assert!(!variable.is_artificial); + } + + #[test] + fn variable_reports_optimized_out_location() { + let variable = VariableWithEvaluation { + name: "local".to_string(), + type_name: "int".to_string(), + dwarf_type: None, + declaration: None, + type_id: None, + evaluation_result: EvaluationResult::Optimized, + scope_depth: 2, + is_parameter: false, + is_artificial: false, + }; + + assert_eq!(variable.availability(), Availability::OptimizedOut); + assert_eq!(variable.semantic_location(), VariableLocation::OptimizedOut); + } +} diff --git a/ghostscope-dwarf/src/parser/fast_parser.rs b/ghostscope-dwarf/src/parser/fast_parser.rs index 01fa286f..90179676 100644 --- a/ghostscope-dwarf/src/parser/fast_parser.rs +++ b/ghostscope-dwarf/src/parser/fast_parser.rs @@ -1015,8 +1015,6 @@ impl<'a> DwarfParser<'a> { column, is_stmt: line_row.is_stmt(), prologue_end: line_row.prologue_end(), - epilogue_begin: line_row.epilogue_begin(), - end_sequence: line_row.end_sequence(), }); } } diff --git a/ghostscope-dwarf/src/parser/mod.rs b/ghostscope-dwarf/src/parser/mod.rs index d7be8d5b..6b19a5a2 100644 --- a/ghostscope-dwarf/src/parser/mod.rs +++ b/ghostscope-dwarf/src/parser/mod.rs @@ -5,8 +5,8 @@ pub(crate) mod fast_parser; pub(crate) mod range_extractor; // Full type resolver removed; shallow resolver lives in detailed_parser -// Re-export what's needed externally -pub use detailed_parser::VariableWithEvaluation; +// Internal parser output consumed by the semantic planning layer. +pub(crate) use detailed_parser::VariableWithEvaluation; // Internal re-exports for crate use pub(crate) use crate::dwarf_expr::ExpressionEvaluator; diff --git a/ghostscope-dwarf/src/semantics/mod.rs b/ghostscope-dwarf/src/semantics/mod.rs index c38a596f..76f25a0e 100644 --- a/ghostscope-dwarf/src/semantics/mod.rs +++ b/ghostscope-dwarf/src/semantics/mod.rs @@ -1,3 +1,7 @@ +pub mod pc_context; +pub mod unwind_plan; +pub mod variable_plan; + pub(crate) mod origins; pub(crate) mod pc; pub(crate) mod types; @@ -6,7 +10,7 @@ pub(crate) use origins::{ resolve_attr_with_unit_origins, resolve_name_with_origins, resolve_origin_entry, }; pub(crate) use pc::{range_contains_pc, ranges_contain_pc}; -pub(crate) use types::{ - resolve_type_ref_in_same_unit_with_origins, resolve_type_ref_with_origins, - strip_typedef_qualified, TypeLoc, -}; +pub use pc_context::*; +pub(crate) use types::{resolve_type_ref_in_same_unit_with_origins, resolve_type_ref_with_origins}; +pub use unwind_plan::*; +pub use variable_plan::*; diff --git a/ghostscope-dwarf/src/semantics/pc_context.rs b/ghostscope-dwarf/src/semantics/pc_context.rs new file mode 100644 index 00000000..28fa604a --- /dev/null +++ b/ghostscope-dwarf/src/semantics/pc_context.rs @@ -0,0 +1,63 @@ +//! PC-centered semantic context types. + +use crate::core::{CuId, DieRef, FunctionId, InlineContextId, ModuleId, ScopeId}; +use std::path::PathBuf; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PcContext { + pub module: ModuleId, + /// Runtime PC in the target process address space. + pub pc: u64, + /// Module-normalized PC used for DWARF and object-file queries. + pub normalized_pc: u64, + pub cu: Option, + pub function: Option, + /// Best-effort display name until stable function DIE ids are wired in. + pub function_name: Option, + pub lexical_scopes: Vec, + pub inline_chain: Vec, + /// Best-effort inline classification until inline DIE chains are exposed. + pub is_inline: Option, + pub line: Option, + pub address_space: AddressSpaceInfo, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InlineFrame { + pub context: Option, + pub call_site: Option, + pub abstract_origin: Option, + pub concrete_die: DieRef, + pub function_name: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PcLineInfo { + pub file_path: String, + pub line_number: u32, + pub column: Option, + pub address: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AddressSpaceInfo { + pub module_path: Option, + pub runtime_base: Option, + pub link_base: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PcRange { + pub start: u64, + pub end: u64, +} + +impl PcRange { + pub fn contains(&self, pc: u64) -> bool { + if self.start == self.end { + pc == self.start + } else { + pc >= self.start && pc < self.end + } + } +} diff --git a/ghostscope-dwarf/src/semantics/types.rs b/ghostscope-dwarf/src/semantics/types.rs index 94a58a50..f0741f19 100644 --- a/ghostscope-dwarf/src/semantics/types.rs +++ b/ghostscope-dwarf/src/semantics/types.rs @@ -114,27 +114,3 @@ pub(crate) fn resolve_type_ref_in_same_unit_with_origins( .filter(|loc| loc.cu_off == unit_cu_off) .map(|loc| loc.die_off)) } - -pub(crate) fn strip_typedef_qualified( - dwarf: &gimli::Dwarf, - mut type_loc: TypeLoc, -) -> crate::core::Result { - loop { - let header = dwarf.unit_header(type_loc.cu_off)?; - let unit = dwarf.unit(header)?; - let die = unit.entry(type_loc.die_off)?; - match die.tag() { - gimli::DW_TAG_typedef - | gimli::DW_TAG_const_type - | gimli::DW_TAG_volatile_type - | gimli::DW_TAG_restrict_type => { - if let Some(next) = resolve_type_ref_with_origins(dwarf, &die, &unit)? { - type_loc = next; - continue; - } - } - _ => {} - } - return Ok(type_loc); - } -} diff --git a/ghostscope-dwarf/src/semantics/unwind_plan.rs b/ghostscope-dwarf/src/semantics/unwind_plan.rs new file mode 100644 index 00000000..6ad978ad --- /dev/null +++ b/ghostscope-dwarf/src/semantics/unwind_plan.rs @@ -0,0 +1,120 @@ +use crate::core::{ComputeStep, ModuleId}; + +/// A compact, row-oriented unwind table derived from DWARF CFI. +#[derive(Debug, Clone, PartialEq)] +pub struct CompactUnwindTable { + pub module: ModuleId, + pub rows: Vec, + pub diagnostics: Vec, +} + +impl CompactUnwindTable { + pub fn row_for_pc(&self, pc: u64) -> Option<&CompactUnwindRow> { + self.rows + .iter() + .find(|row| row.pc_start <= pc && pc < row.pc_end) + } + + pub fn stats(&self) -> CompactUnwindStats { + let bpf_supported_rows = self.rows.iter().filter(|row| row.bpf_supported).count(); + CompactUnwindStats { + row_count: self.rows.len(), + bpf_supported_rows, + unsupported_rows: self.rows.len().saturating_sub(bpf_supported_rows), + diagnostic_count: self.diagnostics.len(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CompactUnwindStats { + pub row_count: usize, + pub bpf_supported_rows: usize, + pub unsupported_rows: usize, + pub diagnostic_count: usize, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct CompactUnwindRow { + pub module: ModuleId, + pub pc_start: u64, + pub pc_end: u64, + pub cfa: CfaRulePlan, + pub return_address_register: u16, + pub return_address: RegisterRecoveryPlan, + pub sp: Option, + pub rbp: Option, + pub bpf_supported: bool, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum CfaRulePlan { + RegPlusOffset { register: u16, offset: i64 }, + Expression { steps: Vec }, + Unsupported { reason: String }, +} + +impl CfaRulePlan { + pub fn is_bpf_fast_path_supported(&self) -> bool { + matches!( + self, + Self::RegPlusOffset { + register: 6 | 7, + .. + } + ) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum RegisterRecoveryPlan { + Undefined, + SameValue { + register: u16, + }, + Register { + register: u16, + }, + AtCfaOffset { + offset: i64, + }, + ValCfaOffset { + offset: i64, + }, + Constant { + value: u64, + }, + Expression { + steps: Vec, + dereference: bool, + }, + Unsupported { + reason: String, + }, +} + +impl RegisterRecoveryPlan { + pub fn is_bpf_fast_path_supported(&self) -> bool { + matches!( + self, + Self::SameValue { .. } + | Self::Register { .. } + | Self::AtCfaOffset { .. } + | Self::ValCfaOffset { .. } + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UnwindDiagnostic { + pub pc_start: u64, + pub pc_end: u64, + pub kind: UnwindDiagnosticKind, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum UnwindDiagnosticKind { + UnsupportedCfaRule { reason: String }, + UnsupportedRegisterRule { register: u16, reason: String }, + MissingReturnAddressRule { register: u16 }, +} diff --git a/ghostscope-dwarf/src/semantics/variable_plan.rs b/ghostscope-dwarf/src/semantics/variable_plan.rs new file mode 100644 index 00000000..5a2405ad --- /dev/null +++ b/ghostscope-dwarf/src/semantics/variable_plan.rs @@ -0,0 +1,1234 @@ +//! Variable semantic plans before runtime-specific lowering. + +use crate::core::{ + AddressExpr, Availability, ComputeStep, DieRef, HelperMode, InlineContextId, MemoryAccessSize, + Provenance, Result, RuntimeCapabilities, RuntimeRequirement, TypeId, UnsupportedReason, + VariableId, VariableLocation, VerifierRisk, +}; +use crate::semantics::PcRange; +use crate::TypeInfo; + +/// Owned semantic view returned by PC-context variable queries. +#[derive(Debug, Clone, PartialEq)] +pub struct VisibleVariable { + pub name: String, + pub type_name: String, + pub dwarf_type: Option, + pub declaration: Option, + pub type_id: Option, + pub location: VariableLocation, + pub availability: Availability, + pub scope_depth: usize, + pub is_parameter: bool, + pub is_artificial: bool, +} + +/// Diagnostic produced while answering a PC-sensitive variable query. +#[derive(Debug, Clone, PartialEq)] +pub struct VariableQueryDiagnostic { + pub pc: u64, + pub name: Option, + pub scope_depth: usize, + pub availability: Availability, + pub detail: String, +} + +/// Visible variables plus non-fatal diagnostics from best-effort discovery. +#[derive(Debug, Clone, PartialEq)] +pub struct VisibleVariablesResult { + pub variables: Vec, + pub diagnostics: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VariableLoweringKind { + DirectValue, + UserMemoryRead, + Composite, + Unavailable, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VariableLoweringPlan { + pub kind: VariableLoweringKind, + pub availability: Availability, + pub requirements: Vec, + pub helper_mode: HelperMode, + pub required_registers: Vec, + pub estimated_stack_bytes: usize, + pub verifier_risk: VerifierRisk, +} + +/// Owned, PC-sensitive variable read plan before runtime-specific lowering. +#[derive(Debug, Clone, PartialEq)] +pub struct VariableReadPlan { + pub name: String, + pub type_name: String, + pub dwarf_type: Option, + pub declaration: Option, + pub type_id: Option, + pub location: VariableLocation, + pub availability: Availability, + pub scope_depth: usize, + pub is_parameter: bool, + pub is_artificial: bool, + pub pc_range: Option, + pub inline_context: Option, + pub provenance: Provenance, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VariableAccessPath { + pub segments: Vec, +} + +impl VariableAccessPath { + pub fn new(segments: Vec) -> Self { + Self { segments } + } + + pub fn fields(fields: impl IntoIterator>) -> Self { + Self { + segments: fields + .into_iter() + .map(|field| VariableAccessSegment::Field(field.into())) + .collect(), + } + } + + fn suffix(&self) -> String { + let mut suffix = String::new(); + for segment in &self.segments { + match segment { + VariableAccessSegment::Field(field) => { + suffix.push('.'); + suffix.push_str(field); + } + VariableAccessSegment::ArrayIndex(index) => { + suffix.push('['); + suffix.push_str(&index.to_string()); + suffix.push(']'); + } + VariableAccessSegment::Dereference => suffix.push_str(".*"), + } + } + suffix + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VariableAccessSegment { + Field(String), + ArrayIndex(i64), + Dereference, +} + +#[derive(Debug, thiserror::Error)] +pub enum PlanError { + #[error("Variable '{name}' has no DWARF type information for access planning")] + MissingTypeInfo { name: String }, + + #[error("Unknown member '{field}' in {kind} '{type_name}' (known members: {members})")] + UnknownMember { + kind: &'static str, + type_name: String, + field: String, + members: String, + }, + + #[error("array access requires array or pointer type, got '{type_name}'")] + InvalidArrayAccess { type_name: String }, + + #[error("pointer dereference requires pointer type, got '{type_name}'")] + InvalidPointerDereference { type_name: String }, + + #[error( + "cannot apply byte offset {offset} to value-backed aggregate location {location:?}; field/array extraction from aggregate values is not implemented" + )] + ValueBackedAggregateOffset { + offset: i64, + location: VariableLocation, + }, + + #[error("cannot dereference variable location shape {location:?}")] + UnsupportedDereference { location: VariableLocation }, +} + +impl PlanError { + pub fn is_value_backed_aggregate_access(&self) -> bool { + matches!(self, PlanError::ValueBackedAggregateOffset { .. }) + } +} + +impl VariableReadPlan { + pub fn from_visible_variable(variable: VisibleVariable, provenance: Provenance) -> Self { + Self { + name: variable.name, + type_name: variable.type_name, + dwarf_type: variable.dwarf_type, + declaration: variable.declaration, + type_id: variable.type_id, + location: variable.location, + availability: variable.availability, + scope_depth: variable.scope_depth, + is_parameter: variable.is_parameter, + is_artificial: variable.is_artificial, + pc_range: None, + inline_context: None, + provenance, + } + } + + pub fn bpf_lowering_plan(&self, capabilities: &RuntimeCapabilities) -> VariableLoweringPlan { + if !self.availability.is_available() { + return VariableLoweringPlan { + kind: VariableLoweringKind::Unavailable, + availability: self.availability.clone(), + requirements: Vec::new(), + helper_mode: HelperMode::NoUserMemoryRead, + required_registers: Vec::new(), + estimated_stack_bytes: 0, + verifier_risk: VerifierRisk::Unsupported { + reason: "variable is unavailable".to_string(), + }, + }; + } + + let kind = self.location.lowering_kind(); + let mut requirements = self.location.runtime_requirements(); + requirements.sort_by_key(requirement_rank); + requirements.dedup(); + let mut required_registers = self.location.required_registers(); + required_registers.sort_unstable(); + required_registers.dedup(); + let estimated_stack_bytes = self.location.estimated_stack_bytes(); + let helper_mode = helper_mode_for_requirements(&requirements, capabilities); + let verifier_risk = + verifier_risk_for_requirements(&requirements, estimated_stack_bytes, capabilities); + + let availability = match &verifier_risk { + VerifierRisk::StackBudgetExceeded { estimated, max } => { + Availability::Unsupported(UnsupportedReason::ExpressionShape { + detail: format!( + "estimated BPF stack use {estimated} bytes exceeds capability limit {max}" + ), + }) + } + _ => requirements + .iter() + .find(|requirement| !capabilities.supports_requirement(requirement)) + .cloned() + .map(Availability::Requires) + .unwrap_or(Availability::Available), + }; + + VariableLoweringPlan { + kind, + availability, + requirements, + helper_mode, + required_registers, + estimated_stack_bytes, + verifier_risk, + } + } + + pub fn plan_access_path(&self, path: &VariableAccessPath) -> Result { + let mut plan = self.clone(); + for segment in &path.segments { + plan = plan.plan_access_segment(segment)?; + } + + plan.name.push_str(&path.suffix()); + Ok(plan) + } + + fn plan_access_segment(&self, segment: &VariableAccessSegment) -> Result { + let dwarf_type = self + .dwarf_type + .clone() + .ok_or_else(|| PlanError::MissingTypeInfo { + name: self.name.clone(), + })?; + + match segment { + VariableAccessSegment::Field(field) => self.plan_field_access(&dwarf_type, field), + VariableAccessSegment::ArrayIndex(index) => self.plan_array_index(&dwarf_type, *index), + VariableAccessSegment::Dereference => self.plan_pointer_deref(&dwarf_type), + } + } + + fn plan_field_access(&self, dwarf_type: &TypeInfo, field: &str) -> Result { + let (base_location, aggregate_type) = match strip_alias_type(dwarf_type) { + TypeInfo::PointerType { target_type, .. } => ( + dereference_location(&self.location)?, + strip_alias_type(target_type).clone(), + ), + ty => (self.location.clone(), ty.clone()), + }; + + let member = match strip_alias_type(&aggregate_type) { + TypeInfo::StructType { name, members, .. } => members + .iter() + .find(|member| member.name == field) + .cloned() + .ok_or_else(|| unknown_member_error("struct", name, field, members))?, + TypeInfo::UnionType { name, members, .. } => members + .iter() + .find(|member| member.name == field) + .cloned() + .ok_or_else(|| unknown_member_error("union", name, field, members))?, + _ => { + return Err(anyhow::anyhow!( + "member '{}' not found on type '{}'", + field, + aggregate_type.type_name() + )); + } + }; + + let mut plan = self.clone(); + plan.location = add_location_offset(base_location, member.offset as i64)?; + plan.type_name = member.member_type.type_name(); + plan.dwarf_type = Some(member.member_type); + plan.type_id = None; + Ok(plan) + } + + fn plan_array_index(&self, dwarf_type: &TypeInfo, index: i64) -> Result { + let (base_location, element_type, stride) = match strip_alias_type(dwarf_type) { + TypeInfo::ArrayType { element_type, .. } => { + let stride = element_type.size().max(1); + (self.location.clone(), element_type.as_ref().clone(), stride) + } + TypeInfo::PointerType { target_type, .. } => { + let stride = target_type.size().max(1); + ( + dereference_location(&self.location)?, + target_type.as_ref().clone(), + stride, + ) + } + ty => { + return Err(PlanError::InvalidArrayAccess { + type_name: ty.type_name(), + } + .into()); + } + }; + + let byte_offset = index.saturating_mul(stride as i64); + let mut plan = self.clone(); + plan.location = add_location_offset(base_location, byte_offset)?; + plan.type_name = element_type.type_name(); + plan.dwarf_type = Some(element_type); + plan.type_id = None; + Ok(plan) + } + + fn plan_pointer_deref(&self, dwarf_type: &TypeInfo) -> Result { + let target_type = match strip_alias_type(dwarf_type) { + TypeInfo::PointerType { target_type, .. } => target_type.as_ref().clone(), + ty => { + return Err(PlanError::InvalidPointerDereference { + type_name: ty.type_name(), + } + .into()); + } + }; + + let mut plan = self.clone(); + plan.location = dereference_location(&self.location)?; + plan.type_name = target_type.type_name(); + plan.dwarf_type = Some(target_type); + plan.type_id = None; + Ok(plan) + } +} + +impl RuntimeCapabilities { + pub fn supports_requirement(&self, requirement: &RuntimeRequirement) -> bool { + match requirement { + RuntimeRequirement::CallerFrame | RuntimeRequirement::DwarfCfiRecovery => { + self.bounded_loops + } + RuntimeRequirement::SleepableUprobe => self.sleepable_uprobe, + RuntimeRequirement::UserMemoryRead => { + self.regular_uprobe || self.sleepable_uprobe || self.copy_from_user_task + } + } + } +} + +trait VariableLocationLoweringExt { + fn lowering_kind(&self) -> VariableLoweringKind; + fn runtime_requirements(&self) -> Vec; + fn required_registers(&self) -> Vec; + fn estimated_stack_bytes(&self) -> usize; +} + +impl VariableLocationLoweringExt for VariableLocation { + fn lowering_kind(&self) -> VariableLoweringKind { + match self { + VariableLocation::Address(_) + | VariableLocation::RegisterAddress { .. } + | VariableLocation::ComputedAddress(_) + | VariableLocation::FrameBaseRelative { .. } => VariableLoweringKind::UserMemoryRead, + VariableLocation::AbsoluteAddressValue(_) + | VariableLocation::RegisterValue { .. } + | VariableLocation::ComputedValue(_) + | VariableLocation::ImplicitValue(_) => VariableLoweringKind::DirectValue, + VariableLocation::Pieces(_) => VariableLoweringKind::Composite, + VariableLocation::OptimizedOut | VariableLocation::Unknown => { + VariableLoweringKind::Unavailable + } + } + } + + fn runtime_requirements(&self) -> Vec { + match self { + VariableLocation::Address(_) + | VariableLocation::RegisterAddress { .. } + | VariableLocation::ComputedAddress(_) => { + let mut requirements = vec![RuntimeRequirement::UserMemoryRead]; + if let VariableLocation::ComputedAddress(steps) = self { + requirements.extend(requirements_for_steps(steps)); + } + requirements + } + VariableLocation::FrameBaseRelative { .. } => vec![ + RuntimeRequirement::DwarfCfiRecovery, + RuntimeRequirement::UserMemoryRead, + ], + VariableLocation::AbsoluteAddressValue(expr) => requirements_for_steps(&expr.steps), + VariableLocation::ComputedValue(steps) => requirements_for_steps(steps), + VariableLocation::Pieces(pieces) => pieces + .iter() + .flat_map(|piece| piece.location.runtime_requirements()) + .collect(), + VariableLocation::RegisterValue { .. } + | VariableLocation::ImplicitValue(_) + | VariableLocation::OptimizedOut + | VariableLocation::Unknown => Vec::new(), + } + } + + fn required_registers(&self) -> Vec { + match self { + VariableLocation::RegisterValue { dwarf_reg } => vec![*dwarf_reg], + VariableLocation::RegisterAddress { dwarf_reg, .. } => vec![*dwarf_reg], + VariableLocation::AbsoluteAddressValue(expr) => registers_for_steps(&expr.steps), + VariableLocation::ComputedValue(steps) | VariableLocation::ComputedAddress(steps) => { + registers_for_steps(steps) + } + VariableLocation::Pieces(pieces) => pieces + .iter() + .flat_map(|piece| piece.location.required_registers()) + .collect(), + VariableLocation::Address(_) + | VariableLocation::FrameBaseRelative { .. } + | VariableLocation::ImplicitValue(_) + | VariableLocation::OptimizedOut + | VariableLocation::Unknown => Vec::new(), + } + } + + fn estimated_stack_bytes(&self) -> usize { + match self { + VariableLocation::AbsoluteAddressValue(expr) => { + estimate_steps_stack_bytes(&expr.steps).max(8) + } + VariableLocation::ComputedValue(steps) | VariableLocation::ComputedAddress(steps) => { + estimate_steps_stack_bytes(steps) + } + VariableLocation::Pieces(pieces) => pieces + .iter() + .map(|piece| piece.location.estimated_stack_bytes()) + .max() + .unwrap_or(0), + VariableLocation::Address(_) + | VariableLocation::RegisterValue { .. } + | VariableLocation::RegisterAddress { .. } + | VariableLocation::FrameBaseRelative { .. } => 8, + VariableLocation::ImplicitValue(bytes) => bytes.len(), + VariableLocation::OptimizedOut | VariableLocation::Unknown => 0, + } + } +} + +fn requirements_for_steps(steps: &[ComputeStep]) -> Vec { + let mut requirements = Vec::new(); + for step in steps { + match step { + ComputeStep::Dereference { .. } => { + requirements.push(RuntimeRequirement::UserMemoryRead) + } + ComputeStep::EntryValueLookup { + caller_pc_steps, + cases, + } => { + requirements.push(RuntimeRequirement::CallerFrame); + requirements.extend(requirements_for_steps(caller_pc_steps)); + for case in cases { + requirements.extend(requirements_for_steps(&case.value_steps)); + } + } + ComputeStep::If { + then_branch, + else_branch, + } => { + requirements.extend(requirements_for_steps(then_branch)); + requirements.extend(requirements_for_steps(else_branch)); + } + _ => {} + } + } + requirements +} + +fn registers_for_steps(steps: &[ComputeStep]) -> Vec { + let mut registers = Vec::new(); + collect_registers_for_steps(steps, &mut registers); + registers +} + +fn collect_registers_for_steps(steps: &[ComputeStep], registers: &mut Vec) { + for step in steps { + match step { + ComputeStep::LoadRegister(register) => registers.push(*register), + ComputeStep::EntryValueLookup { + caller_pc_steps, + cases, + } => { + collect_registers_for_steps(caller_pc_steps, registers); + for case in cases { + collect_registers_for_steps(&case.value_steps, registers); + } + } + ComputeStep::If { + then_branch, + else_branch, + } => { + collect_registers_for_steps(then_branch, registers); + collect_registers_for_steps(else_branch, registers); + } + _ => {} + } + } +} + +fn estimate_steps_stack_bytes(steps: &[ComputeStep]) -> usize { + let nested = steps + .iter() + .map(|step| match step { + ComputeStep::EntryValueLookup { + caller_pc_steps, + cases, + } => cases + .iter() + .map(|case| estimate_steps_stack_bytes(&case.value_steps)) + .chain(std::iter::once(estimate_steps_stack_bytes(caller_pc_steps))) + .max() + .unwrap_or(0), + ComputeStep::If { + then_branch, + else_branch, + } => { + estimate_steps_stack_bytes(then_branch).max(estimate_steps_stack_bytes(else_branch)) + } + _ => 0, + }) + .max() + .unwrap_or(0); + steps.len().saturating_mul(8).max(nested) +} + +fn helper_mode_for_requirements( + requirements: &[RuntimeRequirement], + capabilities: &RuntimeCapabilities, +) -> HelperMode { + if !requirements.contains(&RuntimeRequirement::UserMemoryRead) { + HelperMode::NoUserMemoryRead + } else if capabilities.sleepable_uprobe && capabilities.copy_from_user_task { + HelperMode::CopyFromUserTask + } else { + HelperMode::ProbeReadUser + } +} + +fn verifier_risk_for_requirements( + requirements: &[RuntimeRequirement], + estimated_stack_bytes: usize, + capabilities: &RuntimeCapabilities, +) -> VerifierRisk { + if estimated_stack_bytes > capabilities.max_bpf_stack_bytes { + return VerifierRisk::StackBudgetExceeded { + estimated: estimated_stack_bytes, + max: capabilities.max_bpf_stack_bytes, + }; + } + + if requirements.iter().any(|requirement| { + matches!( + requirement, + RuntimeRequirement::CallerFrame | RuntimeRequirement::DwarfCfiRecovery + ) + }) { + VerifierRisk::RequiresBoundedLoops + } else { + VerifierRisk::Low + } +} + +fn requirement_rank(requirement: &RuntimeRequirement) -> u8 { + match requirement { + RuntimeRequirement::CallerFrame => 0, + RuntimeRequirement::SleepableUprobe => 1, + RuntimeRequirement::UserMemoryRead => 2, + RuntimeRequirement::DwarfCfiRecovery => 3, + } +} + +fn strip_alias_type(ty: &TypeInfo) -> &TypeInfo { + match ty { + TypeInfo::TypedefType { + underlying_type, .. + } + | TypeInfo::QualifiedType { + underlying_type, .. + } => strip_alias_type(underlying_type), + _ => ty, + } +} + +fn unknown_member_error( + kind: &'static str, + type_name: &str, + field: &str, + members: &[crate::StructMember], +) -> anyhow::Error { + let mut member_names = members + .iter() + .map(|member| member.name.clone()) + .collect::>(); + member_names.sort(); + member_names.dedup(); + let list = if member_names.is_empty() { + "".to_string() + } else { + member_names.join(", ") + }; + PlanError::UnknownMember { + kind, + type_name: type_name.to_string(), + field: field.to_string(), + members: list, + } + .into() +} + +fn add_location_offset(location: VariableLocation, offset: i64) -> Result { + match location { + VariableLocation::Address(expr) => { + Ok(VariableLocation::Address(offset_address_expr(expr, offset))) + } + VariableLocation::RegisterAddress { + dwarf_reg, + offset: base, + } => Ok(VariableLocation::RegisterAddress { + dwarf_reg, + offset: base.saturating_add(offset), + }), + VariableLocation::FrameBaseRelative { offset: base } => { + Ok(VariableLocation::FrameBaseRelative { + offset: base.saturating_add(offset), + }) + } + VariableLocation::ComputedAddress(mut steps) => { + push_add_offset(&mut steps, offset); + Ok(VariableLocation::ComputedAddress(steps)) + } + VariableLocation::OptimizedOut => Ok(VariableLocation::OptimizedOut), + VariableLocation::Unknown => Ok(VariableLocation::Unknown), + VariableLocation::AbsoluteAddressValue(_) + | VariableLocation::RegisterValue { .. } + | VariableLocation::ComputedValue(_) + | VariableLocation::ImplicitValue(_) + | VariableLocation::Pieces(_) => { + Err(PlanError::ValueBackedAggregateOffset { offset, location }.into()) + } + } +} + +fn offset_address_expr(mut expr: AddressExpr, offset: i64) -> AddressExpr { + if let [ComputeStep::PushConstant(base)] = expr.steps.as_mut_slice() { + *base = base.saturating_add(offset); + return expr; + } + push_add_offset(&mut expr.steps, offset); + expr +} + +fn push_add_offset(steps: &mut Vec, offset: i64) { + if offset != 0 { + steps.push(ComputeStep::PushConstant(offset)); + steps.push(ComputeStep::Add); + } +} + +fn dereference_location(location: &VariableLocation) -> Result { + match location { + VariableLocation::AbsoluteAddressValue(expr) => Ok(VariableLocation::Address(expr.clone())), + VariableLocation::RegisterValue { dwarf_reg } => { + Ok(VariableLocation::ComputedAddress(vec![ + ComputeStep::LoadRegister(*dwarf_reg), + ])) + } + VariableLocation::ComputedValue(steps) => { + Ok(VariableLocation::ComputedAddress(steps.clone())) + } + VariableLocation::ImplicitValue(bytes) => { + let mut address = 0u64; + for (index, byte) in bytes.iter().take(8).enumerate() { + address |= (*byte as u64) << (index * 8); + } + Ok(VariableLocation::Address(AddressExpr::constant(address))) + } + VariableLocation::Address(expr) => { + let mut steps = expr.steps.clone(); + steps.push(ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }); + Ok(VariableLocation::ComputedAddress(steps)) + } + VariableLocation::RegisterAddress { dwarf_reg, offset } => { + let mut steps = vec![ComputeStep::LoadRegister(*dwarf_reg)]; + push_add_offset(&mut steps, *offset); + steps.push(ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }); + Ok(VariableLocation::ComputedAddress(steps)) + } + VariableLocation::ComputedAddress(steps) => { + let mut steps = steps.clone(); + steps.push(ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }); + Ok(VariableLocation::ComputedAddress(steps)) + } + VariableLocation::OptimizedOut => Ok(VariableLocation::OptimizedOut), + VariableLocation::Unknown => Ok(VariableLocation::Unknown), + VariableLocation::FrameBaseRelative { .. } | VariableLocation::Pieces(_) => { + Err(PlanError::UnsupportedDereference { + location: location.clone(), + } + .into()) + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VariablePlan { + pub variable_id: VariableId, + pub name: String, + pub ty: TypeId, + pub declaration: DieRef, + pub pc_range: Option, + pub inline_context: Option, + pub location: VariableLocation, + pub availability: Availability, + pub provenance: Provenance, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::{AddressExpr, EntryValueCase, MemoryAccessSize, TargetArch}; + use crate::StructMember; + + fn capabilities(regular_uprobe: bool) -> RuntimeCapabilities { + RuntimeCapabilities { + regular_uprobe, + sleepable_uprobe: false, + uprobe_multi: false, + copy_from_user_task: false, + max_bpf_stack_bytes: 512, + bounded_loops: true, + arch: TargetArch::X86_64, + } + } + + fn read_plan(location: VariableLocation) -> VariableReadPlan { + VariableReadPlan { + name: "value".to_string(), + type_name: "int".to_string(), + dwarf_type: None, + declaration: None, + type_id: None, + location, + availability: Availability::Available, + scope_depth: 0, + is_parameter: false, + is_artificial: false, + pc_range: None, + inline_context: None, + provenance: Provenance::DirectDie, + } + } + + fn typed_read_plan(location: VariableLocation, dwarf_type: TypeInfo) -> VariableReadPlan { + VariableReadPlan { + type_name: dwarf_type.type_name(), + dwarf_type: Some(dwarf_type), + ..read_plan(location) + } + } + + #[test] + fn register_value_lowers_without_runtime_requirements() { + let plan = read_plan(VariableLocation::RegisterValue { dwarf_reg: 0 }); + let lowering = plan.bpf_lowering_plan(&capabilities(false)); + + assert_eq!(lowering.kind, VariableLoweringKind::DirectValue); + assert_eq!(lowering.availability, Availability::Available); + assert!(lowering.requirements.is_empty()); + } + + #[test] + fn memory_location_requires_user_memory_read() { + let plan = read_plan(VariableLocation::Address(AddressExpr::constant(0x1000))); + let lowering = plan.bpf_lowering_plan(&capabilities(false)); + + assert_eq!(lowering.kind, VariableLoweringKind::UserMemoryRead); + assert_eq!( + lowering.availability, + Availability::Requires(RuntimeRequirement::UserMemoryRead) + ); + assert_eq!( + lowering.requirements, + vec![RuntimeRequirement::UserMemoryRead] + ); + } + + #[test] + fn memory_location_is_available_with_regular_uprobe() { + let plan = read_plan(VariableLocation::Address(AddressExpr::constant(0x1000))); + let lowering = plan.bpf_lowering_plan(&capabilities(true)); + + assert_eq!(lowering.kind, VariableLoweringKind::UserMemoryRead); + assert_eq!(lowering.availability, Availability::Available); + assert_eq!(lowering.helper_mode, HelperMode::ProbeReadUser); + assert_eq!(lowering.verifier_risk, VerifierRisk::Low); + assert!(lowering.required_registers.is_empty()); + } + + #[test] + fn absolute_address_value_lowers_without_user_memory_read() { + let plan = read_plan(VariableLocation::AbsoluteAddressValue( + AddressExpr::constant(0x1000), + )); + let lowering = plan.bpf_lowering_plan(&capabilities(false)); + + assert_eq!(lowering.kind, VariableLoweringKind::DirectValue); + assert_eq!(lowering.availability, Availability::Available); + assert!(lowering.requirements.is_empty()); + } + + #[test] + fn memory_location_prefers_copy_from_user_task_when_available() { + let mut capabilities = capabilities(false); + capabilities.sleepable_uprobe = true; + capabilities.copy_from_user_task = true; + let plan = read_plan(VariableLocation::Address(AddressExpr::constant(0x1000))); + let lowering = plan.bpf_lowering_plan(&capabilities); + + assert_eq!(lowering.availability, Availability::Available); + assert_eq!(lowering.helper_mode, HelperMode::CopyFromUserTask); + } + + #[test] + fn register_address_records_required_register() { + let plan = read_plan(VariableLocation::RegisterAddress { + dwarf_reg: 6, + offset: -16, + }); + let lowering = plan.bpf_lowering_plan(&capabilities(true)); + + assert_eq!(lowering.required_registers, vec![6]); + assert_eq!(lowering.estimated_stack_bytes, 8); + } + + #[test] + fn entry_value_steps_surface_caller_frame_and_memory_requirements() { + let plan = read_plan(VariableLocation::ComputedValue(vec![ + ComputeStep::EntryValueLookup { + caller_pc_steps: vec![ + ComputeStep::LoadRegister(7), + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ], + cases: vec![EntryValueCase { + caller_return_pc: 0x10, + value_steps: vec![ComputeStep::LoadRegister(5)], + }], + }, + ])); + let lowering = plan.bpf_lowering_plan(&capabilities(true)); + + assert_eq!(lowering.availability, Availability::Available); + assert_eq!( + lowering.requirements, + vec![ + RuntimeRequirement::CallerFrame, + RuntimeRequirement::UserMemoryRead + ] + ); + assert_eq!(lowering.required_registers, vec![5, 7]); + assert_eq!(lowering.verifier_risk, VerifierRisk::RequiresBoundedLoops); + } + + #[test] + fn stack_budget_excess_reports_unsupported_availability() { + let mut capabilities = capabilities(true); + capabilities.max_bpf_stack_bytes = 16; + let plan = read_plan(VariableLocation::ComputedValue(vec![ + ComputeStep::PushConstant(1); + 8 + ])); + let lowering = plan.bpf_lowering_plan(&capabilities); + + assert!(matches!( + lowering.availability, + Availability::Unsupported(UnsupportedReason::ExpressionShape { .. }) + )); + assert_eq!( + lowering.verifier_risk, + VerifierRisk::StackBudgetExceeded { + estimated: 64, + max: 16, + } + ); + } + + #[test] + fn field_access_adds_member_offset_and_type() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let plan = typed_read_plan( + VariableLocation::RegisterAddress { + dwarf_reg: 6, + offset: -32, + }, + TypeInfo::StructType { + name: "Request".to_string(), + size: 16, + members: vec![StructMember { + name: "fd".to_string(), + member_type: int_type.clone(), + offset: 12, + bit_offset: None, + bit_size: None, + }], + }, + ); + + let access = VariableAccessPath::fields(["fd"]); + let planned = plan.plan_access_path(&access).expect("field access"); + + assert_eq!(planned.name, "value.fd"); + assert_eq!(planned.dwarf_type, Some(int_type)); + assert_eq!( + planned.location, + VariableLocation::RegisterAddress { + dwarf_reg: 6, + offset: -20, + } + ); + } + + #[test] + fn field_access_unknown_member_reports_known_members() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let plan = typed_read_plan( + VariableLocation::Address(AddressExpr::constant(0x1000)), + TypeInfo::StructType { + name: "Request".to_string(), + size: 8, + members: vec![ + StructMember { + name: "fd".to_string(), + member_type: int_type.clone(), + offset: 0, + bit_offset: None, + bit_size: None, + }, + StructMember { + name: "flags".to_string(), + member_type: int_type, + offset: 4, + bit_offset: None, + bit_size: None, + }, + ], + }, + ); + + let err = plan + .plan_access_path(&VariableAccessPath::fields(["missing"])) + .expect_err("unknown member should fail"); + + assert_eq!( + err.to_string(), + "Unknown member 'missing' in struct 'Request' (known members: fd, flags)" + ); + } + + #[test] + fn field_access_folds_constant_address_offsets() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let plan = typed_read_plan( + VariableLocation::Address(AddressExpr::constant(0x1000)), + TypeInfo::StructType { + name: "Request".to_string(), + size: 16, + members: vec![StructMember { + name: "fd".to_string(), + member_type: int_type, + offset: 12, + bit_offset: None, + bit_size: None, + }], + }, + ); + + let planned = plan + .plan_access_path(&VariableAccessPath::fields(["fd"])) + .expect("field access"); + + assert_eq!( + planned.location, + VariableLocation::Address(AddressExpr::constant(0x100c)) + ); + } + + #[test] + fn field_access_rejects_value_backed_aggregates() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let struct_type = TypeInfo::StructType { + name: "Pair".to_string(), + size: 8, + members: vec![StructMember { + name: "b".to_string(), + member_type: int_type, + offset: 4, + bit_offset: None, + bit_size: None, + }], + }; + let access = VariableAccessPath::fields(["b"]); + + for location in [ + VariableLocation::AbsoluteAddressValue(AddressExpr::constant(0x1000)), + VariableLocation::RegisterValue { dwarf_reg: 0 }, + VariableLocation::ComputedValue(vec![ComputeStep::LoadRegister(0)]), + ] { + let plan = typed_read_plan(location, struct_type.clone()); + let err = plan + .plan_access_path(&access) + .expect_err("value-backed aggregate field access should fail"); + assert!( + err.downcast_ref::() + .is_some_and(PlanError::is_value_backed_aggregate_access), + "unexpected error: {err}" + ); + } + } + + #[test] + fn array_index_rejects_value_backed_aggregates() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let array_type = TypeInfo::ArrayType { + element_type: Box::new(int_type), + element_count: Some(2), + total_size: Some(8), + }; + let access = VariableAccessPath::new(vec![VariableAccessSegment::ArrayIndex(1)]); + + for location in [ + VariableLocation::AbsoluteAddressValue(AddressExpr::constant(0x1000)), + VariableLocation::RegisterValue { dwarf_reg: 0 }, + VariableLocation::ComputedValue(vec![ComputeStep::LoadRegister(0)]), + ] { + let plan = typed_read_plan(location, array_type.clone()); + let err = plan + .plan_access_path(&access) + .expect_err("value-backed aggregate array access should fail"); + assert!( + err.downcast_ref::() + .is_some_and(PlanError::is_value_backed_aggregate_access), + "unexpected error: {err}" + ); + } + } + + #[test] + fn pointer_field_access_dereferences_then_offsets() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let struct_type = TypeInfo::StructType { + name: "Node".to_string(), + size: 16, + members: vec![StructMember { + name: "value".to_string(), + member_type: int_type, + offset: 8, + bit_offset: None, + bit_size: None, + }], + }; + let plan = typed_read_plan( + VariableLocation::RegisterValue { dwarf_reg: 5 }, + TypeInfo::PointerType { + target_type: Box::new(struct_type), + size: 8, + }, + ); + + let access = VariableAccessPath::fields(["value"]); + let planned = plan.plan_access_path(&access).expect("pointer field"); + + assert_eq!( + planned.location, + VariableLocation::ComputedAddress(vec![ + ComputeStep::LoadRegister(5), + ComputeStep::PushConstant(8), + ComputeStep::Add, + ]) + ); + } + + #[test] + fn pointer_field_access_from_absolute_address_value_rebases_memory_location() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let struct_type = TypeInfo::StructType { + name: "Node".to_string(), + size: 16, + members: vec![StructMember { + name: "value".to_string(), + member_type: int_type, + offset: 8, + bit_offset: None, + bit_size: None, + }], + }; + let plan = typed_read_plan( + VariableLocation::AbsoluteAddressValue(AddressExpr::constant(0x1000)), + TypeInfo::PointerType { + target_type: Box::new(struct_type), + size: 8, + }, + ); + + let planned = plan + .plan_access_path(&VariableAccessPath::fields(["value"])) + .expect("pointer field"); + + assert_eq!( + planned.location, + VariableLocation::Address(AddressExpr::constant(0x1008)) + ); + } + + #[test] + fn pointer_field_access_from_computed_value_uses_value_as_address() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let struct_type = TypeInfo::StructType { + name: "Node".to_string(), + size: 16, + members: vec![StructMember { + name: "value".to_string(), + member_type: int_type, + offset: 8, + bit_offset: None, + bit_size: None, + }], + }; + let plan = typed_read_plan( + VariableLocation::ComputedValue(vec![ComputeStep::PushConstant(0x2000)]), + TypeInfo::PointerType { + target_type: Box::new(struct_type), + size: 8, + }, + ); + + let planned = plan + .plan_access_path(&VariableAccessPath::fields(["value"])) + .expect("pointer field"); + + assert_eq!( + planned.location, + VariableLocation::ComputedAddress(vec![ + ComputeStep::PushConstant(0x2000), + ComputeStep::PushConstant(8), + ComputeStep::Add, + ]) + ); + } + + #[test] + fn array_index_access_uses_element_stride() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let plan = typed_read_plan( + VariableLocation::Address(AddressExpr::constant(0x1000)), + TypeInfo::ArrayType { + element_type: Box::new(int_type), + element_count: Some(8), + total_size: Some(32), + }, + ); + + let access = VariableAccessPath::new(vec![VariableAccessSegment::ArrayIndex(3)]); + let planned = plan.plan_access_path(&access).expect("array index"); + + assert_eq!(planned.name, "value[3]"); + assert_eq!( + planned.location, + VariableLocation::Address(AddressExpr::constant(0x100c)) + ); + } +} diff --git a/ghostscope/src/cli/script_output.rs b/ghostscope/src/cli/script_output.rs index 62b6722a..e1be62e8 100644 --- a/ghostscope/src/cli/script_output.rs +++ b/ghostscope/src/cli/script_output.rs @@ -202,6 +202,27 @@ mod tests { } } + fn sample_expr_error_event() -> ParsedTraceEvent { + ParsedTraceEvent { + trace_id: 8, + timestamp: 2_000_000_000, + pid: 5001, + tid: 5002, + instructions: vec![ + ParsedInstruction::ExprError { + expr: "memcmp(buf, hex(\"41\"), 1)".to_string(), + error_code: 2, + flags: 0x01, + failing_addr: 0x1234, + }, + ParsedInstruction::EndInstruction { + total_instructions: 1, + execution_status: 1, + }, + ], + } + } + fn render_with_renderer(event: &ParsedTraceEvent, options: ScriptOutputOptions) -> Vec { let mut renderer = ScriptOutputRenderer::new(options); renderer.render_event_lines(event) @@ -242,6 +263,47 @@ mod tests { assert_eq!(lines, vec!["hello".to_string(), "value = 42".to_string()]); } + #[test] + fn plain_output_preserves_runtime_expr_errors() { + let lines = render_with_renderer( + &sample_expr_error_event(), + ScriptOutputOptions { + mode: ScriptOutputMode::Plain, + timestamp: ScriptTimestampFormat::None, + color_enabled: false, + }, + ); + + assert_eq!( + lines, + vec![ + "ExprError: memcmp(buf, hex(\"41\"), 1) (read error at 0x0000000000001234, flags: first-arg read-fail)" + .to_string() + ] + ); + } + + #[test] + fn pretty_output_preserves_runtime_expr_errors_with_metadata() { + let lines = render_with_renderer( + &sample_expr_error_event(), + ScriptOutputOptions { + mode: ScriptOutputMode::Pretty, + timestamp: ScriptTimestampFormat::None, + color_enabled: false, + }, + ); + + assert_eq!( + lines, + vec![ + "TraceID:8 PID:5001 TID:5002".to_string(), + " ExprError: memcmp(buf, hex(\"41\"), 1) (read error at 0x0000000000001234, flags: first-arg read-fail)" + .to_string(), + ] + ); + } + #[test] fn pretty_output_can_omit_timestamp() { let lines = render_with_renderer( diff --git a/ghostscope/src/cli/script_runtime.rs b/ghostscope/src/cli/script_runtime.rs index c8e4b8bd..e8985a98 100644 --- a/ghostscope/src/cli/script_runtime.rs +++ b/ghostscope/src/cli/script_runtime.rs @@ -413,6 +413,11 @@ mod tests { config_file_path: None, }, runtime: RuntimeContext::default(), + kernel_capabilities: ghostscope_loader::KernelCapabilities { + supports_ringbuf: true, + supports_perf_event_array: true, + supports_ns_current_pid_tgid_helper: false, + }, } } diff --git a/ghostscope/src/config/runtime.rs b/ghostscope/src/config/runtime.rs index a1405d09..c43a969d 100644 --- a/ghostscope/src/config/runtime.rs +++ b/ghostscope/src/config/runtime.rs @@ -165,12 +165,17 @@ impl RuntimeContext { pub struct ResolvedConfig { pub user: UserConfig, pub runtime: RuntimeContext, + pub kernel_capabilities: KernelCapabilities, } impl ResolvedConfig { pub fn resolve(user: UserConfig, kernel_caps: &KernelCapabilities) -> Result { let runtime = RuntimeContext::resolve(&user, kernel_caps)?; - Ok(Self { user, runtime }) + Ok(Self { + user, + runtime, + kernel_capabilities: *kernel_caps, + }) } pub fn get_ui_config(&self) -> ghostscope_ui::UiConfig { @@ -209,7 +214,7 @@ impl ResolvedConfig { "⚠️ TESTING MODE: force_perf_event_array=true in config - using PerfEventArray" ); ghostscope_compiler::EventMapType::PerfEventArray - } else if KernelCapabilities::ringbuf_supported() { + } else if self.kernel_capabilities.supports_ringbuf { ghostscope_compiler::EventMapType::RingBuf } else { ghostscope_compiler::EventMapType::PerfEventArray @@ -260,10 +265,20 @@ impl ResolvedConfig { special_pid_ns: self.runtime.special_pid_ns, proc_offsets_pid_ns: self.runtime.proc_offsets_pid_ns, input_pid: self.input_pid, + runtime_capabilities: dwarf_runtime_capabilities_from_kernel(&self.kernel_capabilities), } } } +fn dwarf_runtime_capabilities_from_kernel( + kernel_caps: &KernelCapabilities, +) -> ghostscope_compiler::RuntimeCapabilities { + ghostscope_compiler::RuntimeCapabilities { + regular_uprobe: kernel_caps.supports_ringbuf || kernel_caps.supports_perf_event_array, + ..Default::default() + } +} + impl Deref for ResolvedConfig { type Target = UserConfig; diff --git a/ghostscope/src/core/session.rs b/ghostscope/src/core/session.rs index e2457930..e19e6f19 100644 --- a/ghostscope/src/core/session.rs +++ b/ghostscope/src/core/session.rs @@ -416,6 +416,11 @@ mod tests { let resolved_config = ResolvedConfig { user: user_config, runtime: RuntimeContext::default(), + kernel_capabilities: ghostscope_loader::KernelCapabilities { + supports_ringbuf: true, + supports_perf_event_array: true, + supports_ns_current_pid_tgid_helper: false, + }, }; // Create session with config - should automatically set resolver diff --git a/ghostscope/src/script/compiler.rs b/ghostscope/src/script/compiler.rs index 1a3b5ce7..e0b319fe 100644 --- a/ghostscope/src/script/compiler.rs +++ b/ghostscope/src/script/compiler.rs @@ -5,12 +5,7 @@ use ghostscope_ui::events::{ExecutionStatus, ScriptCompilationDetails, ScriptExe use tracing::{error, info, warn}; fn map_compile_error_message(e: &ghostscope_compiler::CompileError) -> String { - match e { - ghostscope_compiler::CompileError::CodeGen( - ghostscope_compiler::ebpf::context::CodeGenError::VariableNotInScope(name), - ) => format!("Use of variable '{name}' outside of its scope"), - _ => e.to_string(), - } + e.user_message().into_owned() } fn pid_alias_runtime_pid( diff --git a/ghostscope/src/tui/info_handlers.rs b/ghostscope/src/tui/info_handlers.rs index 07b5e4bc..cdc41c8a 100644 --- a/ghostscope/src/tui/info_handlers.rs +++ b/ghostscope/src/tui/info_handlers.rs @@ -567,7 +567,7 @@ fn unique_module_count(addresses: &[AddressQueryResult]) -> usize { } fn variable_debug_info_from_query( - variable: ghostscope_dwarf::VariableWithEvaluation, + variable: ghostscope_dwarf::VisibleVariable, ) -> VariableDebugInfo { let type_pretty = variable.dwarf_type.as_ref().map(|t| t.to_string()); let size = variable.dwarf_type.as_ref().map(|t| t.size()); @@ -576,7 +576,7 @@ fn variable_debug_info_from_query( name: variable.name, type_name: variable.type_name, type_pretty, - location_description: format!("{}", variable.evaluation_result), + location_description: format!("{}", variable.location), size, scope_start: None, scope_end: None,