From 64f7e3aadb9508d62d07aa1545b6c77588be93b5 Mon Sep 17 00:00:00 2001 From: swananan Date: Mon, 4 May 2026 21:33:11 +0800 Subject: [PATCH] refactor: narrow dwarf planned addresses Introduce PlannedAddressKind so materialized user-memory reads expose a small address primitive instead of the full VariableLocation enum. The kind records constant, register-offset, frame-base-relative, and computed addresses while AddressOrigin continues to carry rebasing semantics. Lower planned addresses in the compiler by matching PlannedAddressKind directly, then route read-plan value and lvalue address lowering through plan-aware helpers. Address-only lowering derives the planned address without requiring DWARF type metadata. Remove the legacy VariableLocation lowering helpers from compiler tests so this boundary is exercised through read plans instead of compatibility APIs. This keeps complex DWARF expressions behind the computed address escape hatch without letting eBPF lowering inspect source-level VariableLocation shapes. Refs #148. --- ghostscope-compiler/src/ebpf/codegen.rs | 30 +- ghostscope-compiler/src/ebpf/dwarf_bridge.rs | 437 ++++++++---------- ghostscope-compiler/src/ebpf/expression.rs | 110 +++-- ghostscope-dwarf/src/lib.rs | 10 +- .../src/semantics/variable_plan.rs | 91 +++- 5 files changed, 350 insertions(+), 328 deletions(-) diff --git a/ghostscope-compiler/src/ebpf/codegen.rs b/ghostscope-compiler/src/ebpf/codegen.rs index a4722d13..b8b155b0 100644 --- a/ghostscope-compiler/src/ebpf/codegen.rs +++ b/ghostscope-compiler/src/ebpf/codegen.rs @@ -1867,8 +1867,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { CodeGenError::VariableNotFound(format!("{expr:?}")) })?; let mod_hint = self.take_module_hint(); - self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, None, mod_hint.as_deref(), )? @@ -1963,8 +1965,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { || CodeGenError::VariableNotFound(format!("{val_expr:?}")), )?; let mod_hint = self.take_module_hint(); - self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, None, mod_hint.as_deref(), )? @@ -2073,8 +2077,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { || CodeGenError::VariableNotFound(format!("{val_expr:?}")), )?; let mod_hint = self.take_module_hint(); - self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, None, mod_hint.as_deref(), )? @@ -4263,22 +4269,20 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { match self.query_dwarf_for_variable(var_name)? { Some(var_info) => { info!( - "Found DWARF variable: {} = {:?}", - var_name, var_info.location + "Found DWARF variable read plan: {} availability={:?}", + var_name, var_info.availability ); // Require DWARF type information - let dwarf_type = var_info.dwarf_type.as_ref().ok_or_else(|| { + var_info.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError(format!( "Variable '{var_name}' has no type information in DWARF" )) })?; let compile_context = self.get_compile_time_context()?; - self.variable_location_to_llvm_value( - &var_info.location, - dwarf_type, - var_name, + self.variable_read_plan_to_llvm_value( + &var_info, compile_context.pc_address, status_ptr, ) diff --git a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs index 5ef7e9fb..8869cd78 100644 --- a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs +++ b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs @@ -5,8 +5,9 @@ use super::context::{CodeGenError, EbpfContext, Result}; use ghostscope_dwarf::{ - AddressExpr, AddressOrigin, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, - PlannedAddress, SectionType, TypeInfo, VariableAccessPath, VariableAccessSegment, + semantics::{add_location_offset, dereference_location}, + AddressOrigin, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, PlannedAddress, + PlannedAddressKind, SectionType, TypeInfo, VariableAccessPath, VariableAccessSegment, VariableLocation, VariableMaterializationPlan, VariableReadPlan, }; use ghostscope_process::module_probe; @@ -62,65 +63,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { TypeInfo::StructType { .. } | TypeInfo::UnionType { .. } | TypeInfo::ArrayType { .. } ) } - /// Lower a semantic DWARF variable location to an LLVM value. - pub fn variable_location_to_llvm_value( - &mut self, - location: &VariableLocation, - dwarf_type: &TypeInfo, - var_name: &str, - pc_address: u64, - status_ptr: Option>, - ) -> Result> { - debug!( - "Converting VariableLocation to LLVM value for variable: {}", - var_name - ); - debug!("Evaluation context PC address: 0x{:x}", pc_address); - - if let Some(value) = ghostscope_dwarf::PlannedValue::from_location(location.clone()) { - return self - .planned_value_to_llvm_value(&value, dwarf_type, var_name, pc_address, status_ptr); - } - - match location { - VariableLocation::AbsoluteAddressValue(_) - | VariableLocation::RegisterValue { .. } - | VariableLocation::ComputedValue(_) - | VariableLocation::ImplicitValue(_) => Err(CodeGenError::DwarfError(format!( - "Direct DWARF value '{var_name}' could not be materialized as a planned value" - ))), - VariableLocation::Address(_) - | VariableLocation::RegisterAddress { .. } - | VariableLocation::ComputedAddress(_) => { - self.generate_memory_location(location, dwarf_type, status_ptr) - } - VariableLocation::OptimizedOut => { - debug!("Variable {} is optimized out", var_name); - Err(Self::dwarf_expression_unavailable_error( - var_name, - &Availability::OptimizedOut, - pc_address, - )) - } - VariableLocation::Pieces(pieces) => { - debug!( - "Variable {} is composite with {} pieces", - var_name, - pieces.len() - ); - Err(CodeGenError::DwarfError(format!( - "DWARF variable '{var_name}' is split across pieces; piece reconstruction is not implemented" - ))) - } - VariableLocation::FrameBaseRelative { .. } => Err(CodeGenError::DwarfError( - "Frame-base-relative variable plan requires resolved frame base".to_string(), - )), - VariableLocation::Unknown => Err(CodeGenError::DwarfError( - "Variable read plan has unknown location".to_string(), - )), - } - } - fn planned_value_to_llvm_value( &mut self, value: &ghostscope_dwarf::PlannedValue, @@ -177,36 +119,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } - /// Variant that allows passing an explicit module hint for offsets lookup - pub fn variable_location_to_address_with_hint( - &mut self, - location: &VariableLocation, - status_ptr: Option>, - module_hint: Option<&str>, - ) -> Result> { - let Some(address) = PlannedAddress::from_location(location.clone()) else { - return match location { - VariableLocation::OptimizedOut => { - let pc_address = self - .current_compile_time_context - .as_ref() - .map(|ctx| ctx.pc_address) - .unwrap_or(0); - Err(Self::dwarf_expression_unavailable_error( - "DWARF address expression", - &Availability::OptimizedOut, - pc_address, - )) - } - _ => Err(CodeGenError::NotImplemented( - "Unable to compute address from variable location".to_string(), - )), - }; - }; - - self.planned_address_to_llvm_address(&address, status_ptr, module_hint) - } - pub fn planned_address_to_llvm_address( &mut self, address: &PlannedAddress, @@ -265,11 +177,11 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { pt_regs_ptr: PointerValue<'ctx>, status_ptr: Option>, ) -> Result> { - match &address.location { - VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { - self.address_steps_to_unrebased_address(&expr.steps, pt_regs_ptr, status_ptr) + match &address.kind { + PlannedAddressKind::Constant { address } => { + Ok(self.context.i64_type().const_int(*address, false)) } - VariableLocation::RegisterAddress { dwarf_reg, offset } => { + PlannedAddressKind::RegisterOffset { dwarf_reg, offset } => { let reg_val = self.load_register_value(*dwarf_reg, pt_regs_ptr)?; if let BasicValueEnum::IntValue(reg_i) = reg_val { if *offset != 0 { @@ -286,11 +198,11 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { )) } } - VariableLocation::ComputedAddress(steps) => { + PlannedAddressKind::Computed { steps } => { self.address_steps_to_unrebased_address(steps, pt_regs_ptr, status_ptr) } - _ => Err(CodeGenError::NotImplemented( - "Unable to compute address from planned address".to_string(), + PlannedAddressKind::FrameBaseRelative { .. } => Err(CodeGenError::NotImplemented( + "Frame-base-relative planned address requires resolved frame base".to_string(), )), } } @@ -519,19 +431,74 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } - /// Generate LLVM IR for memory-backed variable locations. - fn generate_memory_location( + pub(super) fn variable_read_plan_to_llvm_value( &mut self, - location: &VariableLocation, - dwarf_type: &TypeInfo, + plan: &VariableReadPlan, + pc_address: u64, status_ptr: Option>, ) -> Result> { - let address = PlannedAddress::from_location(location.clone()).ok_or_else(|| { - CodeGenError::DwarfError( - "Variable location cannot be materialized as an address".into(), - ) - })?; - self.generate_memory_location_from_planned_address(&address, dwarf_type, status_ptr) + let materialized = self.variable_read_plan_to_materialization(plan.clone(), pc_address)?; + self.variable_materialization_to_llvm_value(&materialized, pc_address, status_ptr) + } + + pub(super) fn variable_read_plan_to_lvalue_address_with_hint( + &mut self, + plan: &VariableReadPlan, + pc_address: u64, + status_ptr: Option>, + module_hint: Option<&str>, + ) -> Result> { + if !plan.availability.is_available() { + return Err(Self::dwarf_expression_unavailable_error( + &plan.name, + &plan.availability, + pc_address, + )); + } + + let address = match &plan.location { + VariableLocation::Address(_) + | VariableLocation::RegisterAddress { .. } + | VariableLocation::FrameBaseRelative { .. } + | VariableLocation::ComputedAddress(_) => { + PlannedAddress::from_location(plan.location.clone()).ok_or_else(|| { + CodeGenError::DwarfError(format!( + "DWARF variable '{}' has an address-backed location that could not be planned", + plan.name + )) + })? + } + VariableLocation::OptimizedOut => { + return Err(Self::dwarf_expression_unavailable_error( + &plan.name, + &Availability::OptimizedOut, + pc_address, + )) + } + VariableLocation::Pieces(_) => { + return Err(CodeGenError::DwarfError(format!( + "DWARF variable '{}' is split across pieces; piece reconstruction is not implemented", + plan.name + ))) + } + VariableLocation::AbsoluteAddressValue(_) + | VariableLocation::RegisterValue { .. } + | VariableLocation::ComputedValue(_) + | VariableLocation::ImplicitValue(_) => { + return Err(CodeGenError::DwarfError(format!( + "cannot take address of value-backed DWARF expression '{}'", + plan.name + ))) + } + VariableLocation::Unknown => { + return Err(CodeGenError::DwarfError(format!( + "DWARF variable '{}' has unknown location", + plan.name + ))) + } + }; + + self.planned_address_to_llvm_address(&address, status_ptr, module_hint) } fn generate_memory_location_from_planned_address( @@ -1459,123 +1426,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } - /// Helper: Compute pointer dereference - fn compute_pointer_dereference( - &self, - ptr_location: &VariableLocation, - ) -> Result { - match ptr_location { - VariableLocation::AbsoluteAddressValue(expr) => { - Ok(VariableLocation::Address(expr.clone())) - } - VariableLocation::Address(_) - | VariableLocation::RegisterAddress { .. } - | VariableLocation::ComputedAddress(_) => { - let mut steps = self.variable_location_to_compute_steps(ptr_location)?; - steps.push(ComputeStep::Dereference { - size: MemoryAccessSize::U64, - }); - Ok(VariableLocation::ComputedAddress(steps)) - } - VariableLocation::RegisterValue { dwarf_reg } => { - Ok(VariableLocation::RegisterAddress { - dwarf_reg: *dwarf_reg, - offset: 0, - }) - } - VariableLocation::ComputedValue(steps) => { - Ok(VariableLocation::ComputedAddress(steps.clone())) - } - VariableLocation::ImplicitValue(bytes) => { - let mut value: u64 = 0; - for (i, byte) in bytes.iter().take(8).enumerate() { - value |= (*byte as u64) << (8 * i); - } - Ok(VariableLocation::Address(AddressExpr::constant(value))) - } - _ => Err(CodeGenError::NotImplemented( - "Unsupported pointer dereference scenario".to_string(), - )), - } - } - - /// Helper: Convert location to compute steps - fn variable_location_to_compute_steps( - &self, - location: &VariableLocation, - ) -> Result> { - match location { - VariableLocation::Address(expr) => Ok(expr.steps.clone()), - VariableLocation::AbsoluteAddressValue(expr) => Ok(expr.steps.clone()), - VariableLocation::RegisterAddress { dwarf_reg, offset } => { - let mut steps = vec![ComputeStep::LoadRegister(*dwarf_reg)]; - if *offset != 0 { - steps.push(ComputeStep::PushConstant(*offset)); - steps.push(ComputeStep::Add); - } - Ok(steps) - } - VariableLocation::ComputedAddress(steps) | VariableLocation::ComputedValue(steps) => { - Ok(steps.clone()) - } - VariableLocation::RegisterValue { dwarf_reg } => { - Ok(vec![ComputeStep::LoadRegister(*dwarf_reg)]) - } - VariableLocation::ImplicitValue(bytes) => { - let mut value: u64 = 0; - for (i, byte) in bytes.iter().take(8).enumerate() { - value |= (*byte as u64) << (8 * i); - } - Ok(vec![ComputeStep::PushConstant(value as i64)]) - } - _ => Err(CodeGenError::NotImplemented( - "Unable to convert variable location to compute steps".to_string(), - )), - } - } - - fn add_variable_location_offset( - &self, - location: VariableLocation, - offset: i64, - ) -> Result { - if offset == 0 { - return Ok(location); - } - - match location { - VariableLocation::RegisterAddress { - dwarf_reg, - offset: base_offset, - } => Ok(VariableLocation::RegisterAddress { - dwarf_reg, - offset: base_offset.saturating_add(offset), - }), - VariableLocation::Address(expr) => { - let mut steps = expr.steps; - steps.push(ComputeStep::PushConstant(offset)); - steps.push(ComputeStep::Add); - Ok(VariableLocation::ComputedAddress(steps)) - } - VariableLocation::AbsoluteAddressValue(expr) => { - let mut steps = expr.steps; - steps.push(ComputeStep::PushConstant(offset)); - steps.push(ComputeStep::Add); - Ok(VariableLocation::AbsoluteAddressValue(AddressExpr { - steps, - })) - } - VariableLocation::ComputedAddress(mut steps) => { - steps.push(ComputeStep::PushConstant(offset)); - steps.push(ComputeStep::Add); - Ok(VariableLocation::ComputedAddress(steps)) - } - _ => Err(CodeGenError::NotImplemented( - "Unable to apply pointer arithmetic to variable location".to_string(), - )), - } - } - /// Compute a typed pointed-to location for expressions like `ptr +/- K` where K is an element index. /// Returns a computed location along with the pointed-to DWARF type. /// The offset is scaled by the element size of the pointer/array target type. @@ -1635,9 +1485,11 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } }; - let base_location = self.compute_pointer_dereference(&ptr_var.location)?; + let base_location = dereference_location(&ptr_var.location) + .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; let byte_offset = index.saturating_mul(elem_size as i64); - let location = self.add_variable_location_offset(base_location, byte_offset)?; + let location = add_location_offset(base_location, byte_offset) + .map_err(|err| CodeGenError::DwarfError(err.to_string()))?; Ok((location, elem_ty)) } @@ -1647,9 +1499,35 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { mod tests { use super::*; use crate::script::Expr; + use ghostscope_dwarf::AddressExpr; use ghostscope_dwarf::Provenance; use inkwell::context::Context as LlvmContext; + fn read_plan( + name: &str, + type_name: &str, + dwarf_type: Option, + location: VariableLocation, + availability: Availability, + ) -> VariableReadPlan { + VariableReadPlan { + name: name.to_string(), + type_name: type_name.to_string(), + access_path: VariableAccessPath::default(), + dwarf_type, + declaration: None, + type_id: None, + location, + availability, + scope_depth: 0, + is_parameter: false, + is_artificial: false, + pc_range: None, + inline_context: None, + provenance: Provenance::DirectDie, + } + } + #[test] fn access_path_from_expr_flattens_member_array_member_paths() { let expr = Expr::MemberAccess( @@ -1746,8 +1624,15 @@ mod tests { members: vec![], }; let location = VariableLocation::Address(AddressExpr::constant(0x1000)); + let plan = read_plan( + "S", + "S", + Some(st), + location.clone(), + Availability::Available, + ); let v = ctx - .variable_location_to_llvm_value(&location, &st, "S", 0, None) + .variable_read_plan_to_llvm_value(&plan, 0, None) .expect("eval"); match v { BasicValueEnum::PointerValue(_) => {} @@ -1764,8 +1649,9 @@ mod tests { element_count: Some(4), total_size: Some(16), }; + let plan = read_plan("A", "int[4]", Some(arr), location, Availability::Available); let v2 = ctx - .variable_location_to_llvm_value(&location, &arr, "A", 0, None) + .variable_read_plan_to_llvm_value(&plan, 0, None) .expect("eval2"); match v2 { BasicValueEnum::PointerValue(_) => {} @@ -1793,8 +1679,9 @@ mod tests { encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, }; let location = VariableLocation::Address(AddressExpr::constant(0x2000)); + let plan = read_plan("x", "int", Some(bt), location, Availability::Available); let v = ctx - .variable_location_to_llvm_value(&location, &bt, "x", 0, None) + .variable_read_plan_to_llvm_value(&plan, 0, None) .expect("eval"); match v { BasicValueEnum::IntValue(_) => {} @@ -1825,14 +1712,20 @@ mod tests { size: 8, }; let location = VariableLocation::AbsoluteAddressValue(AddressExpr::constant(0x2000)); + let plan = read_plan( + "ptr", + "int*", + Some(ptr_ty), + location.clone(), + Availability::Available, + ); let value = ctx - .variable_location_to_llvm_value(&location, &ptr_ty, "ptr", 0, None) + .variable_read_plan_to_llvm_value(&plan, 0, None) .expect("absolute address value should lower"); assert!(matches!(value, BasicValueEnum::IntValue(_))); - let pointee = ctx - .compute_pointer_dereference(&location) + let pointee = dereference_location(&location) .expect("absolute address value should dereference to memory"); assert_eq!( pointee, @@ -1852,15 +1745,16 @@ mod tests { size: 4, encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, }; + let plan = read_plan( + "x", + "int", + Some(ty), + VariableLocation::OptimizedOut, + Availability::OptimizedOut, + ); let err = ctx - .variable_location_to_llvm_value( - &VariableLocation::OptimizedOut, - &ty, - "x", - 0x1234, - None, - ) + .variable_read_plan_to_llvm_value(&plan, 0x1234, None) .expect_err("optimized value should not lower to a placeholder"); assert!( @@ -1888,9 +1782,10 @@ mod tests { bit_size: 32, location: Box::new(VariableLocation::RegisterValue { dwarf_reg: 0 }), }]); + let plan = read_plan("split", "int", Some(ty), location, Availability::Available); let err = ctx - .variable_location_to_llvm_value(&location, &ty, "split", 0x1234, None) + .variable_read_plan_to_llvm_value(&plan, 0x1234, None) .expect_err("split pieces should not silently use the first piece"); assert!(matches!(err, CodeGenError::DwarfError(_))); @@ -2022,9 +1917,67 @@ mod tests { ComputeStep::Add, ]); + let address = PlannedAddress::from_location(location) + .expect("computed location should materialize as a planned address"); let addr = ctx - .variable_location_to_address_with_hint(&location, None, None) + .planned_address_to_llvm_address(&address, None, None) .expect("computed address with mid-stream dereference should compile"); assert_eq!(addr.get_type().get_bit_width(), 64); } + + #[test] + fn lvalue_address_read_plan_does_not_require_dwarf_type() { + let llctx = LlvmContext::create(); + let opts = crate::CompileOptions::default(); + let mut ctx = EbpfContext::new(&llctx, "untyped_lvalue_addr", Some(0), &opts).expect("ctx"); + ctx.create_basic_ebpf_function("f").expect("fn"); + ctx.__test_ensure_proc_offsets_map().expect("map"); + ctx.__test_alloc_pm_key().expect("pm_key"); + ctx.set_compile_time_context(0x1234, "/nonexistent/module".to_string()); + + let plan = read_plan( + "untyped", + "", + None, + VariableLocation::Address(AddressExpr::constant(0x1000)), + Availability::Available, + ); + + let addr = ctx + .variable_read_plan_to_lvalue_address_with_hint(&plan, 0x1234, None, None) + .expect("address-only read plan should not require DWARF type info"); + + assert_eq!(addr.get_type().get_bit_width(), 64); + } + + #[test] + fn lvalue_address_rejects_absolute_address_values() { + let llctx = LlvmContext::create(); + let opts = crate::CompileOptions::default(); + let mut ctx = EbpfContext::new(&llctx, "value_backed_lvalue", Some(0), &opts).expect("ctx"); + ctx.create_basic_ebpf_function("f").expect("fn"); + + let ty = ghostscope_protocol::TypeInfo::PointerType { + target_type: Box::new(ghostscope_protocol::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }), + size: 8, + }; + let plan = read_plan( + "ptr", + "int*", + Some(ty), + VariableLocation::AbsoluteAddressValue(AddressExpr::constant(0x2000)), + Availability::Available, + ); + + let err = ctx + .variable_read_plan_to_lvalue_address_with_hint(&plan, 0x1234, None, None) + .expect_err("value-backed locations should not support address-of"); + + assert!(matches!(err, CodeGenError::DwarfError(_))); + assert!(err.to_string().contains("value-backed")); + } } diff --git a/ghostscope-compiler/src/ebpf/expression.rs b/ghostscope-compiler/src/ebpf/expression.rs index e6393ff7..0294d095 100644 --- a/ghostscope-compiler/src/ebpf/expression.rs +++ b/ghostscope-compiler/src/ebpf/expression.rs @@ -374,8 +374,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - return self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + return self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, status_ptr, module_hint.as_deref(), ); @@ -403,8 +405,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - return self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + return self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, status_ptr, module_hint.as_deref(), ); @@ -467,13 +471,9 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } match dty { DwarfType::PointerType { .. } => { - let val_any = self.variable_location_to_llvm_value( - &var.location, - dty, - &var.name, - self.get_compile_time_context()?.pc_address, - None, - )?; + let pc_address = self.get_compile_time_context()?.pc_address; + let val_any = + self.variable_read_plan_to_llvm_value(&var, pc_address, None)?; match val_any { IntValue(iv) => Ok(iv), PointerValue(pv) => self @@ -493,8 +493,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, status_ptr, module_hint.as_deref(), ) @@ -510,8 +512,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, status_ptr, module_hint.as_deref(), ) @@ -1103,13 +1107,9 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } match ty { DwarfType::PointerType { .. } => { - let val_any = self.variable_location_to_llvm_value( - &var.location, - ty, - &var.name, - self.get_compile_time_context()?.pc_address, - None, - )?; + let pc_address = self.get_compile_time_context()?.pc_address; + let val_any = + self.variable_read_plan_to_llvm_value(&var, pc_address, None)?; match val_any { BasicValueEnum::IntValue(iv) => iv, BasicValueEnum::PointerValue(pv) => self @@ -1130,8 +1130,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { None }; - self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, status_ptr, module_hint.as_deref(), )? @@ -1733,8 +1735,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { ) })?; let module_hint = self.current_resolved_var_module_path.clone(); - match self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + match self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, None, module_hint.as_deref(), ) { @@ -1768,8 +1772,10 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { })?; // Use current resolved hint if available (set during DWARF resolution) let module_hint = self.current_resolved_var_module_path.clone(); - match self.variable_location_to_address_with_hint( - &var.location, + let pc_address = self.get_compile_time_context()?.pc_address; + match self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, None, module_hint.as_deref(), ) { @@ -2417,19 +2423,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { cur } - // Compute runtime address of the DWARF expression - let module_hint = self.current_resolved_var_module_path.clone(); - let status_ptr = if self.condition_context_active { - Some(self.get_or_create_cond_error_global()) - } else { - None - }; - let addr = self.variable_location_to_address_with_hint( - &var.location, - status_ptr, - module_hint.as_deref(), - )?; - let lit_bytes = lit.as_bytes(); let lit_len = lit_bytes.len() as u32; let one = self.context.bool_type().const_int(1, false); @@ -2449,13 +2442,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } // Evaluate expression to pointer value and read up to L+1 bytes - let val_any = self.variable_location_to_llvm_value( - &var.location, - var.dwarf_type.as_ref().unwrap(), - &var.name, - self.get_compile_time_context()?.pc_address, - None, - )?; + let pc_address = self.get_compile_time_context()?.pc_address; + let val_any = self.variable_read_plan_to_llvm_value(&var, pc_address, None)?; let ptr_i64 = match val_any { BasicValueEnum::IntValue(iv) => iv, BasicValueEnum::PointerValue(pv) => self @@ -2577,6 +2565,19 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Return const false (or true if '!=' requested) return Ok((if is_equal { zero } else { one }).into()); } + let module_hint = self.current_resolved_var_module_path.clone(); + let status_ptr = if self.condition_context_active { + Some(self.get_or_create_cond_error_global()) + } else { + None + }; + let pc_address = self.get_compile_time_context()?.pc_address; + let addr = self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, + status_ptr, + module_hint.as_deref(), + )?; // Read exactly L+1 bytes let (buf_global, status, arr_ty) = self.read_user_bytes_into_buffer(addr, lit_len + 1, "_gs_arrbuf")?; @@ -2661,6 +2662,19 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { .map_err(|e| CodeGenError::Builder(e.to_string()))? } None => { + let module_hint = self.current_resolved_var_module_path.clone(); + let status_ptr = if self.condition_context_active { + Some(self.get_or_create_cond_error_global()) + } else { + None + }; + let pc_address = self.get_compile_time_context()?.pc_address; + let addr = self.variable_read_plan_to_lvalue_address_with_hint( + &var, + pc_address, + status_ptr, + module_hint.as_deref(), + )?; // Fallback using type_name string match parse_type_name(&var.type_name) { ParsedKind::PtrChar => { diff --git a/ghostscope-dwarf/src/lib.rs b/ghostscope-dwarf/src/lib.rs index 7bc66600..429e5adf 100644 --- a/ghostscope-dwarf/src/lib.rs +++ b/ghostscope-dwarf/src/lib.rs @@ -36,11 +36,11 @@ pub use core::{ // Re-export semantic contract types. pub use semantics::{ AddressOrigin, AddressSpaceInfo, CfaRulePlan, CompactUnwindRow, CompactUnwindStats, - CompactUnwindTable, InlineFrame, PcContext, PcLineInfo, PcRange, PlannedAddress, PlannedValue, - RegisterRecoveryPlan, UnwindDiagnostic, UnwindDiagnosticKind, VariableAccessPath, - VariableAccessSegment, VariableLoweringKind, VariableLoweringPlan, VariableMaterialization, - VariableMaterializationPlan, VariablePlan, VariableQueryDiagnostic, VariableReadPlan, - VisibleVariable, VisibleVariablesResult, + CompactUnwindTable, InlineFrame, PcContext, PcLineInfo, PcRange, PlannedAddress, + PlannedAddressKind, PlannedValue, RegisterRecoveryPlan, UnwindDiagnostic, UnwindDiagnosticKind, + VariableAccessPath, VariableAccessSegment, VariableLoweringKind, VariableLoweringPlan, + VariableMaterialization, VariableMaterializationPlan, VariablePlan, VariableQueryDiagnostic, + VariableReadPlan, VisibleVariable, VisibleVariablesResult, }; // Re-export type definitions from protocol (avoiding circular dependencies) diff --git a/ghostscope-dwarf/src/semantics/variable_plan.rs b/ghostscope-dwarf/src/semantics/variable_plan.rs index 865c05a4..e72a7d39 100644 --- a/ghostscope-dwarf/src/semantics/variable_plan.rs +++ b/ghostscope-dwarf/src/semantics/variable_plan.rs @@ -69,10 +69,18 @@ pub enum AddressOrigin { #[derive(Debug, Clone, PartialEq)] pub struct PlannedAddress { - pub location: VariableLocation, + pub kind: PlannedAddressKind, pub origin: AddressOrigin, } +#[derive(Debug, Clone, PartialEq)] +pub enum PlannedAddressKind { + Constant { address: u64 }, + RegisterOffset { dwarf_reg: u16, offset: i64 }, + FrameBaseRelative { offset: i64 }, + Computed { steps: Vec }, +} + #[derive(Debug, Clone, PartialEq)] pub enum PlannedValue { Constant(i64), @@ -492,13 +500,23 @@ impl PlannedValue { impl PlannedAddress { pub fn from_location(location: VariableLocation) -> Option { - let origin = match &location { + let (kind, origin) = match location { VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { - address_origin_for_steps(&expr.steps) + let origin = address_origin_for_steps(&expr.steps); + (PlannedAddressKind::from_steps(expr.steps), origin) + } + VariableLocation::RegisterAddress { dwarf_reg, offset } => ( + PlannedAddressKind::RegisterOffset { dwarf_reg, offset }, + AddressOrigin::RuntimeDerived, + ), + VariableLocation::FrameBaseRelative { offset } => ( + PlannedAddressKind::FrameBaseRelative { offset }, + AddressOrigin::RuntimeDerived, + ), + VariableLocation::ComputedAddress(steps) => { + let origin = address_origin_for_steps(&steps); + (PlannedAddressKind::from_steps(steps), origin) } - VariableLocation::RegisterAddress { .. } - | VariableLocation::FrameBaseRelative { .. } => AddressOrigin::RuntimeDerived, - VariableLocation::ComputedAddress(steps) => address_origin_for_steps(steps), VariableLocation::RegisterValue { .. } | VariableLocation::ComputedValue(_) | VariableLocation::ImplicitValue(_) @@ -507,16 +525,13 @@ impl PlannedAddress { | VariableLocation::Unknown => return None, }; - Some(Self { location, origin }) + Some(Self { kind, origin }) } pub fn constant_link_time_address(&self) -> Option { - match (&self.origin, &self.location) { - (AddressOrigin::LinkTime, VariableLocation::Address(expr)) - | (AddressOrigin::LinkTime, VariableLocation::AbsoluteAddressValue(expr)) => { - fold_constant_steps(&expr.steps) - } - (AddressOrigin::LinkTime, VariableLocation::ComputedAddress(steps)) => { + match (&self.origin, &self.kind) { + (AddressOrigin::LinkTime, PlannedAddressKind::Constant { address }) => Some(*address), + (AddressOrigin::LinkTime, PlannedAddressKind::Computed { steps }) => { fold_constant_steps(steps) } _ => None, @@ -528,16 +543,22 @@ impl PlannedAddress { return None; } - match &self.location { - VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { - link_time_base_and_runtime_tail(&expr.steps) - } - VariableLocation::ComputedAddress(steps) => link_time_base_and_runtime_tail(steps), + match &self.kind { + PlannedAddressKind::Computed { steps } => link_time_base_and_runtime_tail(steps), _ => None, } } } +impl PlannedAddressKind { + fn from_steps(steps: Vec) -> Self { + match fold_constant_steps(&steps) { + Some(address) => Self::Constant { address }, + None => Self::Computed { steps }, + } + } +} + impl RuntimeCapabilities { pub fn supports_requirement(&self, requirement: &RuntimeRequirement) -> bool { match requirement { @@ -895,7 +916,8 @@ fn unknown_member_error( .into() } -fn add_location_offset(location: VariableLocation, offset: i64) -> Result { +/// Apply a byte offset to an address-backed source variable location. +pub fn add_location_offset(location: VariableLocation, offset: i64) -> Result { match location { VariableLocation::Address(expr) => { Ok(VariableLocation::Address(offset_address_expr(expr, offset))) @@ -944,7 +966,8 @@ fn push_add_offset(steps: &mut Vec, offset: i64) { } } -fn dereference_location(location: &VariableLocation) -> Result { +/// Turn a pointer-valued source variable location into its pointee location. +pub fn dereference_location(location: &VariableLocation) -> Result { match location { VariableLocation::AbsoluteAddressValue(expr) => Ok(VariableLocation::Address(expr.clone())), VariableLocation::RegisterValue { dwarf_reg } => { @@ -1100,6 +1123,33 @@ mod tests { VariableMaterialization::UserMemoryRead { address } => { assert_eq!(address.origin, AddressOrigin::LinkTime); assert_eq!(address.constant_link_time_address(), Some(0x1000)); + assert_eq!( + address.kind, + PlannedAddressKind::Constant { address: 0x1000 } + ); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_converts_register_address_to_address_kind() { + let plan = read_plan(VariableLocation::RegisterAddress { + dwarf_reg: 6, + offset: -16, + }); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::UserMemoryRead { address } => { + assert_eq!(address.origin, AddressOrigin::RuntimeDerived); + assert_eq!( + address.kind, + PlannedAddressKind::RegisterOffset { + dwarf_reg: 6, + offset: -16 + } + ); } other => panic!("unexpected materialization: {other:?}"), } @@ -1178,6 +1228,7 @@ mod tests { address: PlannedAddress { origin: AddressOrigin::LinkTime, + kind: PlannedAddressKind::Constant { address: 0x2000 }, .. }, },