diff --git a/bins/dwarf-tool/src/main.rs b/bins/dwarf-tool/src/main.rs index 15cdf19d..e48c1040 100644 --- a/bins/dwarf-tool/src/main.rs +++ b/bins/dwarf-tool/src/main.rs @@ -165,7 +165,7 @@ enum Commands { } #[derive(Debug, serde::Serialize)] -struct VariableInfo { +struct JsonVariableInfo { name: String, type_name: String, location: String, @@ -180,7 +180,7 @@ struct AddressInfo { source_file: Option, source_line: Option, source_column: Option, - variables: Vec, + variables: Vec, } #[derive(Debug, serde::Serialize)] @@ -641,8 +641,8 @@ fn total_variables_in_query_results(addresses: &[AddressQueryResult]) -> usize { addresses.iter().map(query_address_variable_count).sum() } -fn variable_info_from_query(variable: &ghostscope_dwarf::VisibleVariable) -> VariableInfo { - VariableInfo { +fn variable_info_from_query(variable: &ghostscope_dwarf::VisibleVariable) -> JsonVariableInfo { + JsonVariableInfo { name: variable.name.clone(), type_name: variable.type_name.clone(), location: format!("{}", variable.location), @@ -1090,7 +1090,8 @@ async fn run_source_line_benchmark( let mut run_total_variables = 0usize; for module_address in &addresses { - run_total_variables += analyzer.get_all_variables_at_address(module_address)?.len(); + let pc_context = analyzer.resolve_pc(module_address)?; + run_total_variables += analyzer.visible_variables(&pc_context)?.len(); } query_times.push(start.elapsed()); diff --git a/e2e-tests/tests/member_pointer_compilation.rs b/e2e-tests/tests/member_pointer_compilation.rs index 951ad92b..bb86964d 100644 --- a/e2e-tests/tests/member_pointer_compilation.rs +++ b/e2e-tests/tests/member_pointer_compilation.rs @@ -4,6 +4,10 @@ use common::{init, OptimizationLevel, FIXTURES}; const TRACE_LINE: u32 = 68; +fn field_path(fields: &[&str]) -> ghostscope_dwarf::VariableAccessPath { + ghostscope_dwarf::VariableAccessPath::fields(fields.iter().map(|field| (*field).to_string())) +} + async fn compile_member_pointer_script( script: &str, opt_level: OptimizationLevel, @@ -66,41 +70,34 @@ async fn test_member_pointer_planner_resolves_o2_chain_accesses() -> anyhow::Res ); for module_address in &addrs { + let pc_context = analyzer.resolve_pc(module_address)?; let key_data = analyzer - .plan_chain_access_read_plan( - module_address, - "h", - &["key".to_string(), "data".to_string()], - ) + .plan_variable_access_by_name(&pc_context, "h", &field_path(&["key", "data"])) .map_err(|e| { anyhow::anyhow!( - "plan_chain_access_read_plan failed for h.key.data at 0x{:x}: {}", + "plan_variable_access_by_name failed for h.key.data at 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( key_data.is_some(), - "plan_chain_access_read_plan returned None for h.key.data at 0x{:x}", + "plan_variable_access_by_name returned None for h.key.data at 0x{:x}", module_address.address ); let header_pos = analyzer - .plan_chain_access_read_plan( - module_address, - "r", - &["header_in".to_string(), "pos".to_string()], - ) + .plan_variable_access_by_name(&pc_context, "r", &field_path(&["header_in", "pos"])) .map_err(|e| { anyhow::anyhow!( - "plan_chain_access_read_plan failed for r.header_in.pos at 0x{:x}: {}", + "plan_variable_access_by_name failed for r.header_in.pos at 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( header_pos.is_some(), - "plan_chain_access_read_plan returned None for r.header_in.pos at 0x{:x}", + "plan_variable_access_by_name returned None for r.header_in.pos at 0x{:x}", module_address.address ); } @@ -531,11 +528,12 @@ async fn test_complex_bitfield_chain_planner_resolves_member_offsets() -> anyhow ); for module_address in &addrs { + let pc_context = analyzer.resolve_pc(module_address)?; let active = analyzer - .plan_chain_access_read_plan(module_address, "c", &["active".to_string()])? + .plan_variable_access_by_name(&pc_context, "c", &field_path(&["active"]))? .ok_or_else(|| anyhow::anyhow!("missing plan for c.active at {:?}", module_address))?; let flags = analyzer - .plan_chain_access_read_plan(module_address, "c", &["flags".to_string()])? + .plan_variable_access_by_name(&pc_context, "c", &field_path(&["flags"]))? .ok_or_else(|| anyhow::anyhow!("missing plan for c.flags at {:?}", module_address))?; let expected_steps = vec![ diff --git a/e2e-tests/tests/optimized_inline_execution.rs b/e2e-tests/tests/optimized_inline_execution.rs index a9ca8c3b..fca3f4eb 100644 --- a/e2e-tests/tests/optimized_inline_execution.rs +++ b/e2e-tests/tests/optimized_inline_execution.rs @@ -170,18 +170,20 @@ async fn test_optimized_inline_struct_member_access_resolves_inline_parameter_na "No DWARF addresses found for inline_callsite_program.c:{INLINE_STATE_TRACE_LINE}" ); for module_address in &addrs { + let pc_context = analyzer.resolve_pc(module_address)?; + let access_path = ghostscope_dwarf::VariableAccessPath::fields(["total_bytes".to_string()]); let planned = analyzer - .plan_chain_access_read_plan(module_address, "state", &["total_bytes".to_string()]) + .plan_variable_access_by_name(&pc_context, "state", &access_path) .map_err(|e| { anyhow::anyhow!( - "exec-path plan_chain_access_read_plan failed for 0x{:x}: {}", + "exec-path plan_variable_access_by_name failed for 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( planned.is_some(), - "exec-path plan_chain_access_read_plan returned None for 0x{:x}", + "exec-path plan_variable_access_by_name returned None for 0x{:x}", module_address.address ); } @@ -198,18 +200,20 @@ async fn test_optimized_inline_struct_member_access_resolves_inline_parameter_na "No PID-backed DWARF addresses found for inline_callsite_program.c:{INLINE_STATE_TRACE_LINE}" ); for module_address in &pid_addrs { + let pc_context = pid_analyzer.resolve_pc(module_address)?; + let access_path = ghostscope_dwarf::VariableAccessPath::fields(["total_bytes".to_string()]); let planned = pid_analyzer - .plan_chain_access_read_plan(module_address, "state", &["total_bytes".to_string()]) + .plan_variable_access_by_name(&pc_context, "state", &access_path) .map_err(|e| { anyhow::anyhow!( - "pid-backed plan_chain_access_read_plan failed for 0x{:x}: {}", + "pid-backed plan_variable_access_by_name failed for 0x{:x}: {}", module_address.address, e ) })?; anyhow::ensure!( planned.is_some(), - "pid-backed plan_chain_access_read_plan returned None for 0x{:x}", + "pid-backed plan_variable_access_by_name returned None for 0x{:x}", module_address.address ); } diff --git a/ghostscope-compiler/src/ebpf/codegen.rs b/ghostscope-compiler/src/ebpf/codegen.rs index b8b155b0..75057b69 100644 --- a/ghostscope-compiler/src/ebpf/codegen.rs +++ b/ghostscope-compiler/src/ebpf/codegen.rs @@ -533,17 +533,38 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Try DWARF resolution for the pointer side if let Some(var) = self.query_dwarf_for_complex_expr(ptr_side)? { if var.dwarf_type.is_some() { - // Determine pointed-to/element type and compute location with scaled offset let index = sign * int_side; - let (location, elem_ty) = - self.compute_pointed_location_with_index(ptr_side, index)?; - let address = ghostscope_dwarf::PlannedAddress::from_location(location) - .ok_or_else(|| { - CodeGenError::DwarfError( + let pointed_plan = var + .plan_pointer_element_index(index) + .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = + self.variable_read_plan_to_materialization(pointed_plan, pc_address)?; + let elem_ty = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let address = match materialized.materialization { + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { + address, + } => address, + ghostscope_dwarf::VariableMaterialization::Unavailable { + availability, + } => { + return Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + &availability, + pc_address, + )) + } + _ => { + return Err(CodeGenError::DwarfError( "pointer arithmetic did not produce an address-backed plan" .to_string(), - ) - })?; + )) + } + }; let data_len = Self::compute_read_size_for_type(&elem_ty); let module_hint = self.take_module_hint(); if data_len == 0 { diff --git a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs index 6991bdc0..ce54ad6d 100644 --- a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs +++ b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs @@ -5,7 +5,6 @@ use super::context::{CodeGenError, EbpfContext, Result}; use ghostscope_dwarf::{ - semantics::{add_location_offset, dereference_location}, AddressOrigin, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, PlannedAddress, PlannedAddressKind, SectionType, TypeInfo, VariableAccessPath, VariableAccessSegment, VariableLocation, VariableMaterializationPlan, VariableReadPlan, @@ -1227,7 +1226,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } if let Some((global_module, plan)) = analyzer - .plan_global_chain_access_read_plan(&prefer_module, var_name, &[]) + .plan_global_access_read_plan(&prefer_module, var_name, &VariableAccessPath::default()) .map_err(|err| CodeGenError::DwarfError(err.to_string()))? { debug!("Found DWARF global '{}' via variable read plan", var_name); @@ -1382,74 +1381,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { }; Ok(Some((base, VariableAccessPath::new(segments)))) } - - /// Compute a typed pointed-to location for expressions like `ptr +/- K` where K is an element index. - /// Returns a computed location along with the pointed-to DWARF type. - /// The offset is scaled by the element size of the pointer/array target type. - pub fn compute_pointed_location_with_index( - &mut self, - ptr_expr: &crate::script::Expr, - index: i64, - ) -> Result<(VariableLocation, TypeInfo)> { - use ghostscope_dwarf::TypeInfo; - - // Resolve the pointer expression via DWARF - let ptr_var = self - .query_dwarf_for_complex_expr(ptr_expr)? - .ok_or_else(|| CodeGenError::VariableNotFound(format!("{ptr_expr:?}")))?; - - let ptr_ty = ptr_var.dwarf_type.as_ref().ok_or_else(|| { - CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) - })?; - - // Unwrap typedef/qualified wrappers - let mut ty = ptr_ty; - loop { - match ty { - TypeInfo::TypedefType { - underlying_type, .. - } => ty = underlying_type.as_ref(), - TypeInfo::QualifiedType { - underlying_type, .. - } => ty = underlying_type.as_ref(), - _ => break, - } - } - - // Extract pointed-to (element) type and element size - let (elem_ty, elem_size) = match ty { - TypeInfo::PointerType { target_type, .. } => { - let et = target_type.as_ref().clone(); - let es = et.size(); - let es = if es == 0 { 1 } else { es }; - (et, es) - } - TypeInfo::ArrayType { element_type, .. } => { - let et = element_type.as_ref().clone(); - let es = et.size(); - let es = if es == 0 { 1 } else { es }; - (et, es) - } - TypeInfo::FunctionType { .. } => { - return Err(CodeGenError::TypeError( - "Pointer arithmetic is not supported on function pointers".to_string(), - )) - } - _ => { - return Err(CodeGenError::TypeError( - "Pointer arithmetic requires a pointer or array expression".to_string(), - )) - } - }; - - let base_location = dereference_location(&ptr_var.location) - .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; - let byte_offset = index.saturating_mul(elem_size as i64); - let location = add_location_offset(base_location, byte_offset) - .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; - - Ok((location, elem_ty)) - } } #[cfg(test)] @@ -1673,7 +1604,7 @@ mod tests { "ptr", "int*", Some(ptr_ty), - location.clone(), + location, Availability::Available, ); @@ -1681,13 +1612,6 @@ mod tests { .variable_read_plan_to_llvm_value(&plan, 0, None) .expect("absolute address value should lower"); assert!(matches!(value, BasicValueEnum::IntValue(_))); - - let pointee = dereference_location(&location) - .expect("absolute address value should dereference to memory"); - assert_eq!( - pointee, - VariableLocation::Address(AddressExpr::constant(0x2000)) - ); } #[test] diff --git a/ghostscope-dwarf/src/analyzer/mod.rs b/ghostscope-dwarf/src/analyzer/mod.rs index 9d51678c..73d8ef01 100644 --- a/ghostscope-dwarf/src/analyzer/mod.rs +++ b/ghostscope-dwarf/src/analyzer/mod.rs @@ -103,7 +103,7 @@ impl DwarfAnalyzer { let mut variables = Vec::new(); let mut parameters = Vec::new(); - for variable in self.get_all_variables_at_address(module_address)? { + for variable in self.visible_variables_at_address(module_address)? { if variable.is_parameter { parameters.push(variable); } else { diff --git a/ghostscope-dwarf/src/analyzer/plan_global.rs b/ghostscope-dwarf/src/analyzer/plan_global.rs index a76f3d02..13e6813c 100644 --- a/ghostscope-dwarf/src/analyzer/plan_global.rs +++ b/ghostscope-dwarf/src/analyzer/plan_global.rs @@ -74,20 +74,6 @@ impl DwarfAnalyzer { results } - /// Plan a global/static member chain as a neutral read plan. - pub fn plan_global_chain_access_read_plan( - &self, - prefer_module: &PathBuf, - base: &str, - fields: &[String], - ) -> Result> { - self.plan_global_access_read_plan( - prefer_module, - base, - &VariableAccessPath::fields(fields.iter().cloned()), - ) - } - /// Plan a global/static source-level access path as a neutral read plan. pub fn plan_global_access_read_plan( &self, @@ -162,16 +148,6 @@ impl DwarfAnalyzer { die_off: gimli::UnitOffset, provenance: Provenance, ) -> Result { - let variable = self.resolve_variable_by_offsets_in_module(module_path, cu_off, die_off)?; - Ok(Self::read_plan_from_variable(variable, provenance)) - } - - fn resolve_variable_by_offsets_in_module>( - &self, - module_path: P, - cu_off: gimli::DebugInfoOffset, - die_off: gimli::UnitOffset, - ) -> Result { let path_buf = module_path.as_ref().to_path_buf(); if let Some(module_data) = self.modules.get(&path_buf) { let items = vec![(cu_off, die_off)]; @@ -190,7 +166,7 @@ impl DwarfAnalyzer { var.dwarf_type = Some(ti); } } - Ok(var) + Ok(Self::read_plan_from_variable(var, provenance)) } else { Err(anyhow::anyhow!( "Module {} not loaded", diff --git a/ghostscope-dwarf/src/analyzer/plan_pc.rs b/ghostscope-dwarf/src/analyzer/plan_pc.rs index ba5517d6..ddcd27df 100644 --- a/ghostscope-dwarf/src/analyzer/plan_pc.rs +++ b/ghostscope-dwarf/src/analyzer/plan_pc.rs @@ -356,11 +356,11 @@ impl DwarfAnalyzer { .map(Some) } - /// Get all variables visible at the given module address as semantic views. + /// Return variables visible at a module address as semantic views. /// /// # Arguments /// * `module_address` - Module address containing both module path and address offset - pub fn get_all_variables_at_address( + pub(super) fn visible_variables_at_address( &self, module_address: &ModuleAddress, ) -> Result> { @@ -372,16 +372,4 @@ impl DwarfAnalyzer { let ctx = self.resolve_pc(module_address)?; self.visible_variables(&ctx) } - - /// Plan a chain access (e.g., r.headers_in) as a neutral read plan. - pub fn plan_chain_access_read_plan( - &self, - module_address: &ModuleAddress, - base_var: &str, - chain: &[String], - ) -> Result> { - let ctx = self.resolve_pc(module_address)?; - let path = VariableAccessPath::fields(chain.iter().cloned()); - self.plan_variable_access_by_name(&ctx, base_var, &path) - } } diff --git a/ghostscope-dwarf/src/core/types.rs b/ghostscope-dwarf/src/core/types.rs index 13df1b65..c47f0d2e 100644 --- a/ghostscope-dwarf/src/core/types.rs +++ b/ghostscope-dwarf/src/core/types.rs @@ -35,16 +35,6 @@ pub struct SourceLocation { pub address: u64, } -/// Variable information result -#[derive(Debug, Clone)] -pub struct VariableInfo { - pub name: String, - pub type_name: String, - pub location: Option, - pub scope_start: Option, - pub scope_end: Option, -} - /// Function information #[derive(Debug, Clone)] pub struct FunctionInfo { diff --git a/ghostscope-dwarf/src/lib.rs b/ghostscope-dwarf/src/lib.rs index 429e5adf..9bc66de6 100644 --- a/ghostscope-dwarf/src/lib.rs +++ b/ghostscope-dwarf/src/lib.rs @@ -30,7 +30,7 @@ pub use core::{ DieRef, DwarfError, EntryValueCase, FunctionId, FunctionInfo, GlobalVariableInfo, HelperMode, InlineContextId, MemoryAccessSize, ModuleAddress, ModuleId, PieceLocation, Provenance, Result, RuntimeCapabilities, RuntimeRequirement, ScopeId, SectionType, SourceLocation, TargetArch, - TypeId, UnsupportedReason, VariableId, VariableInfo, VariableLocation, VerifierRisk, + TypeId, UnsupportedReason, VariableId, VariableLocation, VerifierRisk, }; // Re-export semantic contract types. diff --git a/ghostscope-dwarf/src/semantics/variable_plan.rs b/ghostscope-dwarf/src/semantics/variable_plan.rs index 806a66f7..c28bca8b 100644 --- a/ghostscope-dwarf/src/semantics/variable_plan.rs +++ b/ghostscope-dwarf/src/semantics/variable_plan.rs @@ -202,6 +202,9 @@ pub enum PlanError { #[error("array access requires array or pointer type, got '{type_name}'")] InvalidArrayAccess { type_name: String }, + #[error("Pointer arithmetic requires a pointer or array expression, got '{type_name}'")] + InvalidPointerArithmetic { type_name: String }, + #[error("pointer dereference requires pointer type, got '{type_name}'")] InvalidPointerDereference { type_name: String }, @@ -223,6 +226,12 @@ impl PlanError { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ElementIndexContext { + AccessPath, + PointerArithmetic, +} + impl VariableReadPlan { pub fn from_visible_variable(variable: VisibleVariable, provenance: Provenance) -> Self { Self { @@ -379,6 +388,26 @@ impl VariableReadPlan { Ok(plan) } + /// Plan pointer-style element access for expressions like `ptr +/- K`. + /// + /// This keeps pointer dereference and element-size scaling in the DWARF + /// semantic layer instead of making compiler lowering rewrite locations. + pub fn plan_pointer_element_index(&self, index: i64) -> Result { + let dwarf_type = self + .dwarf_type + .clone() + .ok_or_else(|| PlanError::MissingTypeInfo { + name: self.name.clone(), + })?; + let mut plan = + self.plan_element_index(&dwarf_type, index, ElementIndexContext::PointerArithmetic)?; + let segment = VariableAccessSegment::ArrayIndex(index); + plan.access_path.segments.push(segment.clone()); + plan.name + .push_str(&VariableAccessPath::new(vec![segment]).suffix()); + Ok(plan) + } + fn plan_access_segment(&self, segment: &VariableAccessSegment) -> Result { let dwarf_type = self .dwarf_type @@ -432,6 +461,15 @@ impl VariableReadPlan { } fn plan_array_index(&self, dwarf_type: &TypeInfo, index: i64) -> Result { + self.plan_element_index(dwarf_type, index, ElementIndexContext::AccessPath) + } + + fn plan_element_index( + &self, + dwarf_type: &TypeInfo, + index: i64, + context: ElementIndexContext, + ) -> Result { let (base_location, element_type, stride) = match strip_alias_type(dwarf_type) { TypeInfo::ArrayType { element_type, .. } => { let stride = element_type.size().max(1); @@ -446,8 +484,12 @@ impl VariableReadPlan { ) } ty => { - return Err(PlanError::InvalidArrayAccess { - type_name: ty.type_name(), + let type_name = ty.type_name(); + return Err(match context { + ElementIndexContext::AccessPath => PlanError::InvalidArrayAccess { type_name }, + ElementIndexContext::PointerArithmetic => { + PlanError::InvalidPointerArithmetic { type_name } + } } .into()); } @@ -1740,6 +1782,71 @@ mod tests { ); } + #[test] + fn pointer_element_index_is_planned_in_dwarf_semantics() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let plan = typed_read_plan( + VariableLocation::RegisterValue { dwarf_reg: 5 }, + TypeInfo::PointerType { + target_type: Box::new(int_type), + size: 8, + }, + ); + + let planned = plan + .plan_pointer_element_index(3) + .expect("pointer element index"); + + assert_eq!(planned.name, "value[3]"); + assert_eq!( + planned.location, + VariableLocation::ComputedAddress(vec![ + ComputeStep::LoadRegister(5), + ComputeStep::PushConstant(12), + ComputeStep::Add, + ]) + ); + } + + #[test] + fn pointer_element_index_rejects_aggregate_arithmetic_with_pointer_error() { + let int_type = TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: gimli::constants::DW_ATE_signed.0 as u16, + }; + let plan = typed_read_plan( + VariableLocation::Address(AddressExpr::constant(0x1000)), + TypeInfo::StructType { + name: "GlobalState".to_string(), + size: 16, + members: vec![StructMember { + name: "counter".to_string(), + member_type: int_type, + offset: 0, + bit_offset: None, + bit_size: None, + }], + }, + ); + + let err = plan + .plan_pointer_element_index(1) + .expect_err("struct arithmetic must be rejected"); + let plan_error = err + .downcast_ref::() + .expect("structured plan error"); + assert!(matches!( + plan_error, + PlanError::InvalidPointerArithmetic { type_name } + if type_name == "struct GlobalState" + )); + } + #[test] fn array_index_access_uses_element_stride() { let int_type = TypeInfo::BaseType {