From 33c28fd5e1c9e08863d6611beb1909711a86ddc6 Mon Sep 17 00:00:00 2001 From: swananan Date: Mon, 4 May 2026 21:03:44 +0800 Subject: [PATCH] refactor: narrow dwarf direct materialization Introduce PlannedValue as the direct-value primitive emitted by DWARF materialization plans. This keeps constants, register values, computed values, implicit bytes, and address values explicit without exposing the full VariableLocation shape to compiler direct-value lowering. Route compiler direct-value codegen through PlannedValue while keeping the legacy VariableLocation helper as a compatibility entry point. This makes the materialization contract closer to the USDT-style operand model and leaves address IR compaction for a later step. Refs #148. --- ghostscope-compiler/src/ebpf/dwarf_bridge.rs | 106 +++++++++------ ghostscope-dwarf/src/lib.rs | 2 +- .../src/semantics/variable_plan.rs | 128 +++++++++++++----- 3 files changed, 164 insertions(+), 72 deletions(-) diff --git a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs index d9e950fd..5ef7e9fb 100644 --- a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs +++ b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs @@ -77,15 +77,71 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { ); debug!("Evaluation context PC address: 0x{:x}", pc_address); - let pt_regs_ptr = self.get_pt_regs_parameter()?; - let result_size = MemoryAccessSize::from_size(Self::get_dwarf_type_size(dwarf_type)); + if let Some(value) = ghostscope_dwarf::PlannedValue::from_location(location.clone()) { + return self + .planned_value_to_llvm_value(&value, dwarf_type, var_name, pc_address, status_ptr); + } match location { - VariableLocation::RegisterValue { dwarf_reg } => { + VariableLocation::AbsoluteAddressValue(_) + | VariableLocation::RegisterValue { .. } + | VariableLocation::ComputedValue(_) + | VariableLocation::ImplicitValue(_) => Err(CodeGenError::DwarfError(format!( + "Direct DWARF value '{var_name}' could not be materialized as a planned value" + ))), + VariableLocation::Address(_) + | VariableLocation::RegisterAddress { .. } + | VariableLocation::ComputedAddress(_) => { + self.generate_memory_location(location, dwarf_type, status_ptr) + } + VariableLocation::OptimizedOut => { + debug!("Variable {} is optimized out", var_name); + Err(Self::dwarf_expression_unavailable_error( + var_name, + &Availability::OptimizedOut, + pc_address, + )) + } + VariableLocation::Pieces(pieces) => { + debug!( + "Variable {} is composite with {} pieces", + var_name, + pieces.len() + ); + Err(CodeGenError::DwarfError(format!( + "DWARF variable '{var_name}' is split across pieces; piece reconstruction is not implemented" + ))) + } + VariableLocation::FrameBaseRelative { .. } => Err(CodeGenError::DwarfError( + "Frame-base-relative variable plan requires resolved frame base".to_string(), + )), + VariableLocation::Unknown => Err(CodeGenError::DwarfError( + "Variable read plan has unknown location".to_string(), + )), + } + } + + fn planned_value_to_llvm_value( + &mut self, + value: &ghostscope_dwarf::PlannedValue, + dwarf_type: &TypeInfo, + var_name: &str, + _pc_address: u64, + status_ptr: Option>, + ) -> Result> { + let pt_regs_ptr = self.get_pt_regs_parameter()?; + let result_size = MemoryAccessSize::from_size(Self::get_dwarf_type_size(dwarf_type)); + match value { + ghostscope_dwarf::PlannedValue::Constant(value) => Ok(self + .context + .i64_type() + .const_int(*value as u64, true) + .into()), + ghostscope_dwarf::PlannedValue::RegisterValue { dwarf_reg } => { debug!("Generating register value: {dwarf_reg}"); self.load_register_value(*dwarf_reg, pt_regs_ptr) } - VariableLocation::ComputedValue(steps) => { + ghostscope_dwarf::PlannedValue::ComputedValue { steps } => { debug!("Generating computed value: {} steps", steps.len()); let runtime_status_ptr = if self.condition_context_active { Some(self.get_or_create_cond_error_global()) @@ -100,7 +156,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { None, ) } - VariableLocation::ImplicitValue(bytes) => { + ghostscope_dwarf::PlannedValue::ImplicitBytes(bytes) => { debug!("Generating implicit value: {} bytes", bytes.len()); let mut value: u64 = 0; for (i, &byte) in bytes.iter().enumerate().take(8) { @@ -108,44 +164,16 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } Ok(self.context.i64_type().const_int(value, false).into()) } - VariableLocation::AbsoluteAddressValue(_) => { + ghostscope_dwarf::PlannedValue::AddressValue { address } => { + debug!("Generating address direct value for variable: {var_name}"); let runtime_status_ptr = if self.condition_context_active { Some(self.get_or_create_cond_error_global()) } else { status_ptr }; - self.variable_location_to_address_with_hint(location, runtime_status_ptr, None) + self.planned_address_to_llvm_address(address, runtime_status_ptr, None) .map(Into::into) } - VariableLocation::Address(_) - | VariableLocation::RegisterAddress { .. } - | VariableLocation::ComputedAddress(_) => { - self.generate_memory_location(location, dwarf_type, status_ptr) - } - VariableLocation::OptimizedOut => { - debug!("Variable {} is optimized out", var_name); - Err(Self::dwarf_expression_unavailable_error( - var_name, - &Availability::OptimizedOut, - pc_address, - )) - } - VariableLocation::Pieces(pieces) => { - debug!( - "Variable {} is composite with {} pieces", - var_name, - pieces.len() - ); - Err(CodeGenError::DwarfError(format!( - "DWARF variable '{var_name}' is split across pieces; piece reconstruction is not implemented" - ))) - } - VariableLocation::FrameBaseRelative { .. } => Err(CodeGenError::DwarfError( - "Frame-base-relative variable plan requires resolved frame base".to_string(), - )), - VariableLocation::Unknown => Err(CodeGenError::DwarfError( - "Variable read plan has unknown location".to_string(), - )), } } @@ -453,14 +481,14 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { status_ptr: Option>, ) -> Result> { match &materialization.materialization { - ghostscope_dwarf::VariableMaterialization::DirectValue { location, .. } => { + ghostscope_dwarf::VariableMaterialization::DirectValue { value } => { let dwarf_type = materialization.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError( "Expression has no DWARF type information".to_string(), ) })?; - self.variable_location_to_llvm_value( - location, + self.planned_value_to_llvm_value( + value, dwarf_type, &materialization.name, pc_address, diff --git a/ghostscope-dwarf/src/lib.rs b/ghostscope-dwarf/src/lib.rs index 4b6bbd57..7bc66600 100644 --- a/ghostscope-dwarf/src/lib.rs +++ b/ghostscope-dwarf/src/lib.rs @@ -36,7 +36,7 @@ pub use core::{ // Re-export semantic contract types. pub use semantics::{ AddressOrigin, AddressSpaceInfo, CfaRulePlan, CompactUnwindRow, CompactUnwindStats, - CompactUnwindTable, InlineFrame, PcContext, PcLineInfo, PcRange, PlannedAddress, + CompactUnwindTable, InlineFrame, PcContext, PcLineInfo, PcRange, PlannedAddress, PlannedValue, RegisterRecoveryPlan, UnwindDiagnostic, UnwindDiagnosticKind, VariableAccessPath, VariableAccessSegment, VariableLoweringKind, VariableLoweringPlan, VariableMaterialization, VariableMaterializationPlan, VariablePlan, VariableQueryDiagnostic, VariableReadPlan, diff --git a/ghostscope-dwarf/src/semantics/variable_plan.rs b/ghostscope-dwarf/src/semantics/variable_plan.rs index 9080c194..865c05a4 100644 --- a/ghostscope-dwarf/src/semantics/variable_plan.rs +++ b/ghostscope-dwarf/src/semantics/variable_plan.rs @@ -73,21 +73,21 @@ pub struct PlannedAddress { pub origin: AddressOrigin, } +#[derive(Debug, Clone, PartialEq)] +pub enum PlannedValue { + Constant(i64), + RegisterValue { dwarf_reg: u16 }, + ComputedValue { steps: Vec }, + ImplicitBytes(Vec), + AddressValue { address: PlannedAddress }, +} + #[derive(Debug, Clone, PartialEq)] pub enum VariableMaterialization { - DirectValue { - location: VariableLocation, - address_origin: Option, - }, - UserMemoryRead { - address: PlannedAddress, - }, - Composite { - pieces: Vec, - }, - Unavailable { - availability: Availability, - }, + DirectValue { value: PlannedValue }, + UserMemoryRead { address: PlannedAddress }, + Composite { pieces: Vec }, + Unavailable { availability: Availability }, } #[derive(Debug, Clone, PartialEq)] @@ -288,10 +288,21 @@ impl VariableReadPlan { } } else { match lowering.kind { - VariableLoweringKind::DirectValue => VariableMaterialization::DirectValue { - address_origin: direct_value_address_origin(&self.location), - location: self.location.clone(), - }, + VariableLoweringKind::DirectValue => { + match PlannedValue::from_location(self.location.clone()) { + Some(value) => VariableMaterialization::DirectValue { value }, + None => VariableMaterialization::Unavailable { + availability: Availability::Unsupported( + UnsupportedReason::ExpressionShape { + detail: format!( + "location {} cannot be materialized as a direct value", + self.location + ), + }, + ), + }, + } + } VariableLoweringKind::UserMemoryRead => { match PlannedAddress::from_location(self.location.clone()) { Some(address) => VariableMaterialization::UserMemoryRead { address }, @@ -450,6 +461,35 @@ impl VariableReadPlan { } } +impl PlannedValue { + pub fn from_location(location: VariableLocation) -> Option { + match location { + VariableLocation::RegisterValue { dwarf_reg } => { + Some(Self::RegisterValue { dwarf_reg }) + } + VariableLocation::ComputedValue(steps) => { + if let [ComputeStep::PushConstant(value)] = steps.as_slice() { + Some(Self::Constant(*value)) + } else { + Some(Self::ComputedValue { steps }) + } + } + VariableLocation::ImplicitValue(bytes) => Some(Self::ImplicitBytes(bytes)), + VariableLocation::AbsoluteAddressValue(expr) => { + PlannedAddress::from_location(VariableLocation::AbsoluteAddressValue(expr)) + .map(|address| Self::AddressValue { address }) + } + VariableLocation::Address(_) + | VariableLocation::RegisterAddress { .. } + | VariableLocation::FrameBaseRelative { .. } + | VariableLocation::ComputedAddress(_) + | VariableLocation::Pieces(_) + | VariableLocation::OptimizedOut + | VariableLocation::Unknown => None, + } + } +} + impl PlannedAddress { pub fn from_location(location: VariableLocation) -> Option { let origin = match &location { @@ -512,13 +552,6 @@ impl RuntimeCapabilities { } } -fn direct_value_address_origin(location: &VariableLocation) -> Option { - match location { - VariableLocation::AbsoluteAddressValue(expr) => Some(address_origin_for_steps(&expr.steps)), - _ => None, - } -} - fn address_origin_for_steps(steps: &[ComputeStep]) -> AddressOrigin { if fold_constant_steps(steps).is_some() { return AddressOrigin::LinkTime; @@ -1140,19 +1173,50 @@ mod tests { match materialized.materialization { VariableMaterialization::DirectValue { - address_origin, - location, - } => { - assert_eq!(address_origin, Some(AddressOrigin::LinkTime)); - assert!(matches!( - location, - VariableLocation::AbsoluteAddressValue(_) - )); + value: + PlannedValue::AddressValue { + address: + PlannedAddress { + origin: AddressOrigin::LinkTime, + .. + }, + }, + } => {} + VariableMaterialization::DirectValue { value } => { + panic!("unexpected direct value: {value:?}"); } other => panic!("unexpected materialization: {other:?}"), } } + #[test] + fn materialization_plan_converts_constant_direct_value() { + let plan = read_plan(VariableLocation::ComputedValue(vec![ + ComputeStep::PushConstant(42), + ])); + let materialized = plan.materialization_plan(&capabilities(false)); + + match materialized.materialization { + VariableMaterialization::DirectValue { + value: PlannedValue::Constant(42), + } => {} + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_converts_register_direct_value() { + let plan = read_plan(VariableLocation::RegisterValue { dwarf_reg: 6 }); + let materialized = plan.materialization_plan(&capabilities(false)); + + match materialized.materialization { + VariableMaterialization::DirectValue { + value: PlannedValue::RegisterValue { dwarf_reg: 6 }, + } => {} + other => panic!("unexpected materialization: {other:?}"), + } + } + #[test] fn materialization_plan_surfaces_piece_locations_without_first_piece_fallback() { let plan = read_plan(VariableLocation::Pieces(vec![PieceLocation {