diff --git a/ghostscope-compiler/src/ebpf/codegen.rs b/ghostscope-compiler/src/ebpf/codegen.rs index 59933111..a4722d13 100644 --- a/ghostscope-compiler/src/ebpf/codegen.rs +++ b/ghostscope-compiler/src/ebpf/codegen.rs @@ -29,7 +29,7 @@ struct PrintVarRuntimeMeta { #[derive(Debug, Clone)] enum ComplexArgSource<'ctx> { RuntimeRead { - location: ghostscope_dwarf::VariableLocation, + address: ghostscope_dwarf::PlannedAddress, dwarf_type: ghostscope_dwarf::TypeInfo, module_for_offsets: Option, }, @@ -48,7 +48,7 @@ enum ComplexArgSource<'ctx> { bytes: Vec, }, AddressValue { - location: ghostscope_dwarf::VariableLocation, + address: ghostscope_dwarf::PlannedAddress, module_for_offsets: Option, }, // Newly added: a value computed in LLVM at runtime (e.g., expression result) @@ -172,6 +172,99 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } + fn complex_arg_from_dwarf_read_plan( + &mut self, + plan: ghostscope_dwarf::VariableReadPlan, + display_name: Option, + ) -> Result> { + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = self.variable_read_plan_to_materialization(plan, pc_address)?; + let display_name = display_name.unwrap_or_else(|| materialized.name.clone()); + + match &materialized.materialization { + ghostscope_dwarf::VariableMaterialization::Unavailable { + availability: ghostscope_dwarf::Availability::OptimizedOut, + } => { + let optimized_type = ghostscope_dwarf::TypeInfo::OptimizedOut { + name: materialized.name.clone(), + }; + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(optimized_type), + access_path: Vec::new(), + data_len: 0, + source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, + }) + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + availability, + pc_address, + )) + } + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let data_len = Self::compute_read_size_for_type(&dwarf_type); + if data_len == 0 { + return Err(CodeGenError::TypeSizeNotAvailable(display_name)); + } + let module_hint = self.take_module_hint(); + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type.clone()), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::RuntimeRead { + address: address.clone(), + dwarf_type, + module_for_offsets: module_hint, + }, + }) + } + ghostscope_dwarf::VariableMaterialization::DirectValue { .. } => { + let value = + self.variable_materialization_to_llvm_value(&materialized, pc_address, None)?; + let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let value = match value { + BasicValueEnum::IntValue(value) => value, + BasicValueEnum::PointerValue(value) => self + .builder + .build_ptr_to_int(value, self.context.i64_type(), "direct_ptr_to_i64") + .map_err(|e| CodeGenError::Builder(e.to_string()))?, + _ => { + return Err(CodeGenError::DwarfError(format!( + "direct DWARF value '{}' did not lower to an integer", + materialized.name + ))) + } + }; + let data_len = Self::compute_read_size_for_type(&dwarf_type).clamp(1, 8); + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::ComputedInt { value, byte_len: data_len }, + }) + } + ghostscope_dwarf::VariableMaterialization::Composite { .. } => Err( + CodeGenError::DwarfError(format!( + "DWARF variable '{}' is split across pieces; piece reconstruction is not implemented", + materialized.name + )), + ), + } + } + /// Unified expression resolver: returns a ComplexArg carrying /// a consistent var_name_index/type_index/access_path/data_len/source /// with strict priority: script variables -> DWARF (locals/params/globals). @@ -324,13 +417,33 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { let var = self .query_dwarf_for_complex_expr(inner)? .ok_or_else(|| CodeGenError::VariableNotFound(format!("{inner:?}")))?; - let inner_ty = var.dwarf_type.as_ref().ok_or_else(|| { + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = self.variable_read_plan_to_materialization(var, pc_address)?; + let inner_ty = materialized.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) })?; let ptr_ty = ghostscope_dwarf::TypeInfo::PointerType { target_type: Box::new(inner_ty.clone()), size: 8, }; + let address = match materialized.materialization { + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + address + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + return Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + &availability, + pc_address, + )) + } + _ => { + return Err(CodeGenError::DwarfError(format!( + "cannot take address of value-backed DWARF expression '{}'", + materialized.name + ))) + } + }; let module_hint = self.take_module_hint(); Ok(ComplexArg { var_name_index: self @@ -340,7 +453,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len: 8, source: ComplexArgSource::AddressValue { - location: var.location.clone(), + address, module_for_offsets: module_hint, }, }) @@ -351,208 +464,21 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { | E::ArrayAccess(_, _) | E::PointerDeref(_) | E::ChainAccess(_)) => { - if let Some(plan) = self.query_dwarf_for_complex_expr_plan(expr)? { - let pc_address = self.get_compile_time_context()?.pc_address; - let (var_name, dwarf_type, location) = - self.variable_read_plan_to_runtime_read_parts(plan, pc_address)?; - let display_name = if matches!(expr, E::PointerDeref(_)) { - self.expr_to_name(expr) - } else { - var_name - }; - if matches!(location, ghostscope_dwarf::VariableLocation::OptimizedOut) { - return Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(display_name), - type_index: self.trace_context.add_type(dwarf_type), - access_path: Vec::new(), - data_len: 0, - source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, - }); - } - let data_len = Self::compute_read_size_for_type(&dwarf_type); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(display_name)); - } - let module_hint = self.take_module_hint(); - return Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(display_name), - type_index: self.trace_context.add_type(dwarf_type.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location, - dwarf_type, - module_for_offsets: module_hint, - }, - }); - } - - let var = self - .query_dwarf_for_complex_expr(expr)? + let plan = self + .query_dwarf_for_complex_expr_plan(expr)? .ok_or_else(|| CodeGenError::VariableNotFound(format!("{expr:?}")))?; - if var.availability == ghostscope_dwarf::Availability::OptimizedOut { - let ti = ghostscope_protocol::type_info::TypeInfo::OptimizedOut { - name: var.name.clone(), - }; - return Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(var.name.clone()), - type_index: self.trace_context.add_type(ti), - access_path: Vec::new(), - data_len: 0, - source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, - }); - } - let dwarf_type = var.dwarf_type.as_ref().ok_or_else(|| { - CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) - })?; - let data_len = Self::compute_read_size_for_type(dwarf_type); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(var.name)); - } - // Previously clamped to 1993 bytes; now use full DWARF size (transport clamps per event size) - // data_len unchanged - let module_hint = self.take_module_hint(); - Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(var.name.clone()), - type_index: self.trace_context.add_type(dwarf_type.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location: var.location.clone(), - dwarf_type: dwarf_type.clone(), - module_for_offsets: module_hint, - }, - }) + let display_name = if matches!(expr, E::PointerDeref(_)) { + Some(self.expr_to_name(expr)) + } else { + None + }; + self.complex_arg_from_dwarf_read_plan(plan, display_name) } // 6) Variable not in script scope → DWARF variable or computed fast-path for simple scalars E::Variable(name) => { if let Some(v) = self.query_dwarf_for_variable(name)? { - if let Some(ref t) = v.dwarf_type { - // If DWARF reports optimized-out at this PC, emit OptimizedOut type with no data - if v.availability == ghostscope_dwarf::Availability::OptimizedOut { - let ti = ghostscope_protocol::type_info::TypeInfo::OptimizedOut { - name: v.name.clone(), - }; - return Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(v.name.clone()), - type_index: self.trace_context.add_type(ti), - access_path: Vec::new(), - data_len: 0, - source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, - }); - } - let is_link_addr = - matches!(v.location, ghostscope_dwarf::VariableLocation::Address(_)); - if Self::is_simple_typeinfo(t) && !is_link_addr { - // Prefer computed value to avoid runtime reads - let compiled = self.compile_expr(expr)?; - match compiled { - BasicValueEnum::IntValue(iv) => { - // Respect DWARF pointer types to keep pointer formatting - let (kind, byte_len) = if matches!( - t, - ghostscope_dwarf::TypeInfo::PointerType { .. } - ) { - (TypeKind::Pointer, 8) - } else { - let bitw = iv.get_type().get_bit_width(); - if bitw == 1 { - (TypeKind::Bool, 1) - } else if bitw <= 8 { - (TypeKind::I8, 1) - } else if bitw <= 16 { - (TypeKind::I16, 2) - } else if bitw <= 32 { - (TypeKind::I32, 4) - } else { - (TypeKind::I64, 8) - } - }; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.add_synthesized_type_index_for_kind(kind), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len, - }, - }) - } - BasicValueEnum::PointerValue(pv) => { - // Pointer register-backed → cast to i64 with pointer typeindex - let iv = self - .builder - .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") - .map_err(|e| CodeGenError::Builder(e.to_string()))?; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self - .add_synthesized_type_index_for_kind(TypeKind::Pointer), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len: 8, - }, - }) - } - _ => { - // Fall back to runtime read path - let data_len = Self::compute_read_size_for_type(t); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(v.name)); - } - let module_hint = self.take_module_hint(); - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(v.name.clone()), - type_index: self.trace_context.add_type(t.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location: v.location.clone(), - dwarf_type: t.clone(), - module_for_offsets: module_hint, - }, - }) - } - } - } else { - // Complex types or link-time addresses: use RuntimeRead - // (globals/statics need memory read; not an address print unless AddressOf) - let data_len = Self::compute_read_size_for_type(t); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(v.name)); - } - let module_hint = self.take_module_hint(); - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(v.name.clone()), - type_index: self.trace_context.add_type(t.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location: v.location.clone(), - dwarf_type: t.clone(), - module_for_offsets: module_hint, - }, - }) - } - } else { - Err(CodeGenError::DwarfError( - "Variable has no DWARF type information".to_string(), - )) - } + self.complex_arg_from_dwarf_read_plan(v, None) } else { Err(CodeGenError::VariableNotInScope(name.clone())) } @@ -611,6 +537,13 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { let index = sign * int_side; let (location, elem_ty) = self.compute_pointed_location_with_index(ptr_side, index)?; + let address = ghostscope_dwarf::PlannedAddress::from_location(location) + .ok_or_else(|| { + CodeGenError::DwarfError( + "pointer arithmetic did not produce an address-backed plan" + .to_string(), + ) + })?; let data_len = Self::compute_read_size_for_type(&elem_ty); let module_hint = self.take_module_hint(); if data_len == 0 { @@ -627,7 +560,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len: 8, source: ComplexArgSource::AddressValue { - location, + address, module_for_offsets: module_hint, }, }); @@ -640,7 +573,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len, source: ComplexArgSource::RuntimeRead { - location, + address, dwarf_type: elem_ty, module_for_offsets: module_hint, }, @@ -732,7 +665,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Ok(1) } ComplexArgSource::RuntimeRead { - location, + address, ref dwarf_type, module_for_offsets, } => { @@ -744,7 +677,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { }; self.generate_print_complex_variable_runtime( meta, - &location, + &address, dwarf_type, module_for_offsets.as_deref(), )?; @@ -1104,28 +1037,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Already accumulated; EndInstruction will send the whole event Ok(()) } - /// Determine if a TypeInfo qualifies as a "simple variable" for PrintVariableIndex - /// Simple: base types (bool/int/float/char), enums (with base type 1/2/4/8), pointers; - /// Complex: arrays, structs, unions, functions - fn is_simple_typeinfo(t: &ghostscope_dwarf::TypeInfo) -> bool { - use ghostscope_dwarf::TypeInfo as TI; - match t { - TI::BaseType { size, .. } => matches!(*size, 1 | 2 | 4 | 8), - TI::EnumType { base_type, .. } => { - let sz = base_type.size(); - matches!(sz, 1 | 2 | 4 | 8) - } - TI::PointerType { .. } => true, - TI::TypedefType { - underlying_type, .. - } - | TI::QualifiedType { - underlying_type, .. - } => Self::is_simple_typeinfo(underlying_type), - _ => false, - } - } - fn is_char_byte_typeinfo(t: &ghostscope_dwarf::TypeInfo) -> bool { use ghostscope_dwarf::TypeInfo as TI; match t { @@ -3302,7 +3213,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } ComplexArgSource::RuntimeRead { - location, + address, dwarf_type, module_for_offsets, } => { @@ -3315,9 +3226,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { .build_bit_cast(var_data_ptr, ptr_type, "dst_ptr") .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; let size_val = i32_type.const_int(a.data_len as u64, false); - // Compute source address; if link-time address, apply ASLR offsets via map - let src_addr = self.variable_location_to_address_with_hint( - location, + let src_addr = self.planned_address_to_llvm_address( + address, Some(apl_ptr), module_for_offsets.as_deref(), )?; @@ -3479,12 +3389,11 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { self.builder.position_at_end(cont2_block); } ComplexArgSource::AddressValue { - location, + address, module_for_offsets, } => { - // Compute address (apply ASLR if link-time address) and store as 8 bytes - let addr = self.variable_location_to_address_with_hint( - location, + let addr = self.planned_address_to_llvm_address( + address, Some(apl_ptr), module_for_offsets.as_deref(), )?; @@ -4389,7 +4298,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { fn generate_print_complex_variable_runtime( &mut self, meta: PrintVarRuntimeMeta, - location: &ghostscope_dwarf::VariableLocation, + address: &ghostscope_dwarf::PlannedAddress, dwarf_type: &ghostscope_dwarf::TypeInfo, module_hint: Option<&str>, ) -> Result<()> { @@ -4399,7 +4308,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path = %meta.access_path, type_size = dwarf_type.size(), data_len_limit = meta.data_len_limit, - location = ?location, + address = ?address, "generate_print_complex_variable_runtime: begin" ); // Compute sizes first, then reserve instruction region directly in accumulation buffer @@ -4716,7 +4625,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Compute source address with ASLR-aware helper, honoring module hint // Prefer a previously recorded module path for offsets; fall back handled in helper let src_addr = - self.variable_location_to_address_with_hint(location, Some(status_ptr), module_hint)?; + self.planned_address_to_llvm_address(address, Some(status_ptr), module_hint)?; tracing::trace!(src_addr = %{src_addr}, "generate_print_complex_variable_runtime: computed src_addr"); // Setup common types and casts diff --git a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs index dac7623c..d9e950fd 100644 --- a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs +++ b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs @@ -5,8 +5,9 @@ use super::context::{CodeGenError, EbpfContext, Result}; use ghostscope_dwarf::{ - AddressExpr, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, SectionType, - TypeInfo, VariableAccessPath, VariableAccessSegment, VariableLocation, VariableReadPlan, + AddressExpr, AddressOrigin, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, + PlannedAddress, SectionType, TypeInfo, VariableAccessPath, VariableAccessSegment, + VariableLocation, VariableMaterializationPlan, VariableReadPlan, }; use ghostscope_process::module_probe; use inkwell::values::{BasicValueEnum, IntValue, PointerValue}; @@ -135,17 +136,9 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { var_name, pieces.len() ); - if let Some(first_piece) = pieces.first() { - self.variable_location_to_llvm_value( - &first_piece.location, - dwarf_type, - var_name, - pc_address, - status_ptr, - ) - } else { - Ok(self.context.i64_type().const_zero().into()) - } + Err(CodeGenError::DwarfError(format!( + "DWARF variable '{var_name}' is split across pieces; piece reconstruction is not implemented" + ))) } VariableLocation::FrameBaseRelative { .. } => Err(CodeGenError::DwarfError( "Frame-base-relative variable plan requires resolved frame base".to_string(), @@ -163,51 +156,99 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { status_ptr: Option>, module_hint: Option<&str>, ) -> Result> { - // Policy note: - // - Link-time addresses (DW_OP_addr or constant-foldable address expressions) are - // always rebased using per-module section offsets (ASLR) to get a runtime address. - // - Runtime-derived addresses (register/stack-relative or computed via dereference) - // are used as-is and are NOT rebased. - // The caller signals which path we are on by providing the semantic location shape. + let Some(address) = PlannedAddress::from_location(location.clone()) else { + return match location { + VariableLocation::OptimizedOut => { + let pc_address = self + .current_compile_time_context + .as_ref() + .map(|ctx| ctx.pc_address) + .unwrap_or(0); + Err(Self::dwarf_expression_unavailable_error( + "DWARF address expression", + &Availability::OptimizedOut, + pc_address, + )) + } + _ => Err(CodeGenError::NotImplemented( + "Unable to compute address from variable location".to_string(), + )), + }; + }; + + self.planned_address_to_llvm_address(&address, status_ptr, module_hint) + } + + pub fn planned_address_to_llvm_address( + &mut self, + address: &PlannedAddress, + status_ptr: Option>, + module_hint: Option<&str>, + ) -> Result> { let pt_regs_ptr = self.get_pt_regs_parameter()?; self.store_offsets_found_const(true)?; - match location { - VariableLocation::OptimizedOut => { - let pc_address = self - .current_compile_time_context - .as_ref() - .map(|ctx| ctx.pc_address) - .unwrap_or(0); - Err(Self::dwarf_expression_unavailable_error( - "DWARF address expression", - &Availability::OptimizedOut, - pc_address, - )) + match address.origin { + AddressOrigin::LinkTime => { + let link_addr = address.constant_link_time_address().ok_or_else(|| { + CodeGenError::DwarfError( + "read plan marked address as link-time without a constant address" + .to_string(), + ) + })?; + self.runtime_address_from_link_time_address(link_addr, status_ptr, module_hint) } - VariableLocation::Address(expr) => self.address_steps_to_address_with_hint( - &expr.steps, - pt_regs_ptr, - status_ptr, - module_hint, - ), - VariableLocation::AbsoluteAddressValue(expr) => self - .address_steps_to_address_with_hint( - &expr.steps, - pt_regs_ptr, + AddressOrigin::LinkTimeBase => { + let (link_addr, tail_steps) = + address.link_time_base_and_runtime_tail().ok_or_else(|| { + CodeGenError::DwarfError( + "read plan marked address as link-time-base without a base address" + .to_string(), + ) + })?; + let runtime_base = self.runtime_address_from_link_time_address( + link_addr, status_ptr, module_hint, - ), + )?; + let value = self.generate_compute_steps( + tail_steps, + pt_regs_ptr, + None, + status_ptr, + Some(runtime_base), + )?; + match value { + BasicValueEnum::IntValue(value) => Ok(value), + _ => Err(CodeGenError::LLVMError( + "Computed address did not produce integer".to_string(), + )), + } + } + AddressOrigin::RuntimeDerived | AddressOrigin::Unknown => { + self.planned_address_without_rebase(address, pt_regs_ptr, status_ptr) + } + } + } + + fn planned_address_without_rebase( + &mut self, + address: &PlannedAddress, + pt_regs_ptr: PointerValue<'ctx>, + status_ptr: Option>, + ) -> Result> { + match &address.location { + VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { + self.address_steps_to_unrebased_address(&expr.steps, pt_regs_ptr, status_ptr) + } VariableLocation::RegisterAddress { dwarf_reg, offset } => { let reg_val = self.load_register_value(*dwarf_reg, pt_regs_ptr)?; if let BasicValueEnum::IntValue(reg_i) = reg_val { if *offset != 0 { let ofs_val = self.context.i64_type().const_int(*offset as u64, true); - let sum = self - .builder + self.builder .build_int_add(reg_i, ofs_val, "addr_with_offset") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - Ok(sum) + .map_err(|e| CodeGenError::LLVMError(e.to_string())) } else { Ok(reg_i) } @@ -218,96 +259,29 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } VariableLocation::ComputedAddress(steps) => { - self.address_steps_to_address_with_hint(steps, pt_regs_ptr, status_ptr, module_hint) + self.address_steps_to_unrebased_address(steps, pt_regs_ptr, status_ptr) } _ => Err(CodeGenError::NotImplemented( - "Unable to compute address from variable location".to_string(), + "Unable to compute address from planned address".to_string(), )), } } - fn address_steps_to_address_with_hint( + fn address_steps_to_unrebased_address( &mut self, steps: &[ComputeStep], pt_regs_ptr: PointerValue<'ctx>, status_ptr: Option>, - module_hint: Option<&str>, ) -> Result> { - if let Some(link_addr) = Self::fold_constant_address_steps(steps) { - return self.runtime_address_from_link_time_address(link_addr, status_ptr, module_hint); - } - - // If a static base is dereferenced before any register dependency, rebase - // the base first and then execute the remaining runtime expression. - if let Some(ComputeStep::PushConstant(base_const)) = steps.first() { - let mut saw_reg = false; - let mut saw_deref = false; - for step in &steps[1..] { - match step { - ComputeStep::LoadRegister(_) => { - saw_reg = true; - break; - } - ComputeStep::Dereference { .. } => { - saw_deref = true; - break; - } - _ => {} - } - } - - if saw_deref && !saw_reg { - let rt = self.runtime_address_from_link_time_address( - *base_const as u64, - status_ptr, - module_hint, - )?; - let val = self.generate_compute_steps( - &steps[1..], - pt_regs_ptr, - None, - status_ptr, - Some(rt), - )?; - return match val { - BasicValueEnum::IntValue(value) => Ok(value), - _ => Err(CodeGenError::LLVMError( - "Computed location did not produce integer".to_string(), - )), - }; - } - } - let val = self.generate_compute_steps(steps, pt_regs_ptr, None, status_ptr, None)?; match val { BasicValueEnum::IntValue(value) => Ok(value), _ => Err(CodeGenError::LLVMError( - "Computed location did not produce integer".to_string(), + "Computed address did not produce integer".to_string(), )), } } - fn fold_constant_address_steps(steps: &[ComputeStep]) -> Option { - let mut const_stack: Vec = Vec::new(); - for step in steps { - match step { - ComputeStep::PushConstant(value) => const_stack.push(*value), - ComputeStep::Add => { - let b = const_stack.pop()?; - let a = const_stack.pop()?; - const_stack.push(a.saturating_add(b)); - } - _ => return None, - } - } - - if const_stack.len() == 1 && const_stack[0] >= 0 { - Some(const_stack[0] as u64) - } else { - None - } - } - fn runtime_address_from_link_time_address( &mut self, link_addr: u64, @@ -447,33 +421,74 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Ok(extended.into()) } - pub(super) fn variable_read_plan_to_runtime_read_parts( + pub(super) fn variable_read_plan_to_materialization( &self, plan: VariableReadPlan, pc_address: u64, - ) -> Result<(String, TypeInfo, VariableLocation)> { - let lowering = plan.bpf_lowering_plan(&self.compile_options.runtime_capabilities); - if !lowering.availability.is_available() - && lowering.availability != Availability::OptimizedOut + ) -> Result { + let materialization = plan.materialization_plan(&self.compile_options.runtime_capabilities); + if !materialization.availability.is_available() + && materialization.availability != Availability::OptimizedOut { return Err(Self::dwarf_expression_unavailable_error( - &plan.name, - &lowering.availability, + &materialization.name, + &materialization.availability, pc_address, )); } - let dwarf_type = if lowering.availability == Availability::OptimizedOut { - TypeInfo::OptimizedOut { - name: plan.name.clone(), - } - } else { - plan.dwarf_type.clone().ok_or_else(|| { + if materialization.availability != Availability::OptimizedOut { + materialization.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) - })? - }; + })?; + } + + Ok(materialization) + } - Ok((plan.name, dwarf_type, plan.location)) + pub fn variable_materialization_to_llvm_value( + &mut self, + materialization: &VariableMaterializationPlan, + pc_address: u64, + status_ptr: Option>, + ) -> Result> { + match &materialization.materialization { + ghostscope_dwarf::VariableMaterialization::DirectValue { location, .. } => { + let dwarf_type = materialization.dwarf_type.as_ref().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + self.variable_location_to_llvm_value( + location, + dwarf_type, + &materialization.name, + pc_address, + status_ptr, + ) + } + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + let dwarf_type = materialization.dwarf_type.as_ref().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + self.generate_memory_location_from_planned_address(address, dwarf_type, status_ptr) + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + Err(Self::dwarf_expression_unavailable_error( + &materialization.name, + availability, + pc_address, + )) + } + ghostscope_dwarf::VariableMaterialization::Composite { .. } => { + Err(CodeGenError::DwarfError(format!( + "DWARF variable '{}' is split across pieces; piece reconstruction is not implemented", + materialization.name + ))) + } + } } /// Generate LLVM IR for memory-backed variable locations. @@ -482,6 +497,20 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { location: &VariableLocation, dwarf_type: &TypeInfo, status_ptr: Option>, + ) -> Result> { + let address = PlannedAddress::from_location(location.clone()).ok_or_else(|| { + CodeGenError::DwarfError( + "Variable location cannot be materialized as an address".into(), + ) + })?; + self.generate_memory_location_from_planned_address(&address, dwarf_type, status_ptr) + } + + fn generate_memory_location_from_planned_address( + &mut self, + address: &PlannedAddress, + dwarf_type: &TypeInfo, + status_ptr: Option>, ) -> Result> { let module_hint = self.current_resolved_var_module_path.clone(); let runtime_status_ptr = if self.condition_context_active { @@ -489,8 +518,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { status_ptr }; - let addr = self.variable_location_to_address_with_hint( - location, + let addr = self.planned_address_to_llvm_address( + address, runtime_status_ptr, module_hint.as_deref(), )?; @@ -1814,6 +1843,32 @@ mod tests { assert!(err.to_string().contains("0x1234")); } + #[test] + fn piece_locations_are_rejected_instead_of_using_first_piece() { + let llctx = LlvmContext::create(); + let opts = crate::CompileOptions::default(); + let mut ctx = EbpfContext::new(&llctx, "piece_value", Some(0), &opts).expect("ctx"); + ctx.create_basic_ebpf_function("f").expect("fn"); + + let ty = ghostscope_protocol::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }; + let location = VariableLocation::Pieces(vec![ghostscope_dwarf::PieceLocation { + bit_offset: 0, + bit_size: 32, + location: Box::new(VariableLocation::RegisterValue { dwarf_reg: 0 }), + }]); + + let err = ctx + .variable_location_to_llvm_value(&location, &ty, "split", 0x1234, None) + .expect_err("split pieces should not silently use the first piece"); + + assert!(matches!(err, CodeGenError::DwarfError(_))); + assert!(err.to_string().contains("split across pieces")); + } + #[test] fn unavailable_error_formats_structured_dwarf_reason() { let err = EbpfContext::dwarf_expression_unavailable_error( @@ -1859,6 +1914,7 @@ mod tests { let plan = VariableReadPlan { name: "x".to_string(), type_name: "int".to_string(), + access_path: VariableAccessPath::default(), dwarf_type: Some(dwarf_type), declaration: None, type_id: None, @@ -1873,7 +1929,7 @@ mod tests { }; let err = ctx - .variable_read_plan_to_runtime_read_parts(plan, 0x1234) + .variable_read_plan_to_materialization(plan, 0x1234) .expect_err("zero stack capability should reject the read plan"); assert!(matches!(err, CodeGenError::VariableUnavailable(_))); @@ -1893,6 +1949,7 @@ mod tests { let plan = VariableReadPlan { name: "x".to_string(), type_name: "int".to_string(), + access_path: VariableAccessPath::default(), dwarf_type: Some(dwarf_type), declaration: None, type_id: None, @@ -1906,16 +1963,16 @@ mod tests { provenance: Provenance::DirectDie, }; - let (_, marker_type, location) = ctx - .variable_read_plan_to_runtime_read_parts(plan, 0x1234) + let materialized = ctx + .variable_read_plan_to_materialization(plan, 0x1234) .expect("optimized-out runtime metadata should remain printable"); - assert_eq!(location, VariableLocation::OptimizedOut); - assert_eq!( - marker_type, - TypeInfo::OptimizedOut { - name: "x".to_string() + assert_eq!(materialized.availability, Availability::OptimizedOut); + assert!(matches!( + materialized.materialization, + ghostscope_dwarf::VariableMaterialization::Unavailable { + availability: Availability::OptimizedOut } - ); + )); } #[test] diff --git a/ghostscope-compiler/src/ebpf/expression.rs b/ghostscope-compiler/src/ebpf/expression.rs index 82ed9fbc..93ab74e4 100644 --- a/ghostscope-compiler/src/ebpf/expression.rs +++ b/ghostscope-compiler/src/ebpf/expression.rs @@ -7,7 +7,6 @@ use crate::script::{BinaryOp, Expr}; use aya_ebpf_bindings::bindings::bpf_func_id::BPF_FUNC_probe_read_user; use ghostscope_dwarf::{ AmbiguityReason, Availability, RuntimeRequirement, TypeInfo as DwarfType, UnsupportedReason, - VariableReadPlan, }; use inkwell::values::{BasicValueEnum, IntValue}; use inkwell::AddressSpace; @@ -2189,24 +2188,18 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } }; - let dwarf_type = variable_plan.dwarf_type.as_ref().ok_or_else(|| { + let materialized = + self.variable_read_plan_to_materialization(variable_plan, compile_context.pc_address)?; + let dwarf_type = materialized.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) })?; - Self::ensure_dwarf_value_available(&variable_plan, compile_context.pc_address)?; debug!( "compile_dwarf_expression: Found DWARF info for expression '{}' with type: {:?}", - variable_plan.name, dwarf_type + materialized.name, dwarf_type ); - // Use the unified evaluation logic to generate LLVM IR - self.variable_location_to_llvm_value( - &variable_plan.location, - dwarf_type, - &variable_plan.name, - compile_context.pc_address, - None, - ) + self.variable_materialization_to_llvm_value(&materialized, compile_context.pc_address, None) } pub(crate) fn dwarf_expression_unavailable_error( @@ -2287,22 +2280,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } - pub(crate) fn ensure_dwarf_value_available( - variable: &VariableReadPlan, - pc_address: u64, - ) -> Result<()> { - let availability = variable.availability.clone(); - if availability.is_available() { - Ok(()) - } else { - Err(Self::dwarf_expression_unavailable_error( - &variable.name, - &availability, - pc_address, - )) - } - } - /// Helper: Convert expression to string for debugging fn expr_to_debug_string(expr: &crate::script::Expr) -> String { use crate::script::Expr; diff --git a/ghostscope-dwarf/src/analyzer/mod.rs b/ghostscope-dwarf/src/analyzer/mod.rs index 3af5ab40..9d51678c 100644 --- a/ghostscope-dwarf/src/analyzer/mod.rs +++ b/ghostscope-dwarf/src/analyzer/mod.rs @@ -1068,6 +1068,7 @@ mod tests { VariableReadPlan { name: name.to_string(), type_name: "int".to_string(), + access_path: crate::VariableAccessPath::default(), dwarf_type: Some(crate::TypeInfo::BaseType { name: "int".to_string(), size: 4, diff --git a/ghostscope-dwarf/src/analyzer/plan_pc.rs b/ghostscope-dwarf/src/analyzer/plan_pc.rs index b99c536f..ba5517d6 100644 --- a/ghostscope-dwarf/src/analyzer/plan_pc.rs +++ b/ghostscope-dwarf/src/analyzer/plan_pc.rs @@ -2,7 +2,7 @@ use super::DwarfAnalyzer; use crate::{ core::{ModuleAddress, Provenance, Result}, semantics::{ - AddressSpaceInfo, PcContext, PcLineInfo, PlanError, VariableAccessPath, + AddressSpaceInfo, PcContext, PcLineInfo, PcRange, PlanError, VariableAccessPath, VariableAccessSegment, VariableReadPlan, VisibleVariable, VisibleVariablesResult, }, }; @@ -135,6 +135,15 @@ impl DwarfAnalyzer { VariableReadPlan::from_visible_variable(variable.visible_variable(), provenance) } + fn attach_pc_context(ctx: &PcContext, mut plan: VariableReadPlan) -> VariableReadPlan { + plan.pc_range = Some(PcRange { + start: ctx.normalized_pc, + end: ctx.normalized_pc, + }); + plan.inline_context = ctx.inline_chain.last().and_then(|frame| frame.context); + plan + } + pub(super) fn plan_access_path_with_type_completion( &self, module_path: &Path, @@ -178,7 +187,10 @@ impl DwarfAnalyzer { ) .map(|variable| { variable.map(|variable| { - VariableReadPlan::from_visible_variable(variable, Provenance::DirectDie) + Self::attach_pc_context( + ctx, + VariableReadPlan::from_visible_variable(variable, Provenance::DirectDie), + ) }) }) } @@ -293,9 +305,9 @@ impl DwarfAnalyzer { match matches.as_slice() { [] => Ok(None), - [variable] => Ok(Some(VariableReadPlan::from_visible_variable( - variable.clone(), - Provenance::DirectDie, + [variable] => Ok(Some(Self::attach_pc_context( + ctx, + VariableReadPlan::from_visible_variable(variable.clone(), Provenance::DirectDie), ))), _ => Err(anyhow::anyhow!( "Ambiguous VariableId {:?} at PC 0x{:x}: {} visible matches", diff --git a/ghostscope-dwarf/src/lib.rs b/ghostscope-dwarf/src/lib.rs index 9f2698b6..4b6bbd57 100644 --- a/ghostscope-dwarf/src/lib.rs +++ b/ghostscope-dwarf/src/lib.rs @@ -28,18 +28,19 @@ pub use analyzer::{ pub use core::{ AddressExpr, AmbiguityReason, Availability, CallerFrameRecovery, CfaResult, ComputeStep, CuId, DieRef, DwarfError, EntryValueCase, FunctionId, FunctionInfo, GlobalVariableInfo, HelperMode, - InlineContextId, MemoryAccessSize, ModuleAddress, ModuleId, Provenance, Result, + InlineContextId, MemoryAccessSize, ModuleAddress, ModuleId, PieceLocation, Provenance, Result, RuntimeCapabilities, RuntimeRequirement, ScopeId, SectionType, SourceLocation, TargetArch, TypeId, UnsupportedReason, VariableId, VariableInfo, VariableLocation, VerifierRisk, }; // Re-export semantic contract types. pub use semantics::{ - AddressSpaceInfo, CfaRulePlan, CompactUnwindRow, CompactUnwindStats, CompactUnwindTable, - InlineFrame, PcContext, PcLineInfo, PcRange, RegisterRecoveryPlan, UnwindDiagnostic, - UnwindDiagnosticKind, VariableAccessPath, VariableAccessSegment, VariableLoweringKind, - VariableLoweringPlan, VariablePlan, VariableQueryDiagnostic, VariableReadPlan, VisibleVariable, - VisibleVariablesResult, + AddressOrigin, AddressSpaceInfo, CfaRulePlan, CompactUnwindRow, CompactUnwindStats, + CompactUnwindTable, InlineFrame, PcContext, PcLineInfo, PcRange, PlannedAddress, + RegisterRecoveryPlan, UnwindDiagnostic, UnwindDiagnosticKind, VariableAccessPath, + VariableAccessSegment, VariableLoweringKind, VariableLoweringPlan, VariableMaterialization, + VariableMaterializationPlan, VariablePlan, VariableQueryDiagnostic, VariableReadPlan, + VisibleVariable, VisibleVariablesResult, }; // Re-export type definitions from protocol (avoiding circular dependencies) diff --git a/ghostscope-dwarf/src/semantics/variable_plan.rs b/ghostscope-dwarf/src/semantics/variable_plan.rs index 5a2405ad..9080c194 100644 --- a/ghostscope-dwarf/src/semantics/variable_plan.rs +++ b/ghostscope-dwarf/src/semantics/variable_plan.rs @@ -2,8 +2,8 @@ use crate::core::{ AddressExpr, Availability, ComputeStep, DieRef, HelperMode, InlineContextId, MemoryAccessSize, - Provenance, Result, RuntimeCapabilities, RuntimeRequirement, TypeId, UnsupportedReason, - VariableId, VariableLocation, VerifierRisk, + PieceLocation, Provenance, Result, RuntimeCapabilities, RuntimeRequirement, TypeId, + UnsupportedReason, VariableId, VariableLocation, VerifierRisk, }; use crate::semantics::PcRange; use crate::TypeInfo; @@ -59,11 +59,54 @@ pub struct VariableLoweringPlan { pub verifier_risk: VerifierRisk, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AddressOrigin { + LinkTime, + LinkTimeBase, + RuntimeDerived, + Unknown, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct PlannedAddress { + pub location: VariableLocation, + pub origin: AddressOrigin, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum VariableMaterialization { + DirectValue { + location: VariableLocation, + address_origin: Option, + }, + UserMemoryRead { + address: PlannedAddress, + }, + Composite { + pieces: Vec, + }, + Unavailable { + availability: Availability, + }, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VariableMaterializationPlan { + pub name: String, + pub type_name: String, + pub access_path: VariableAccessPath, + pub dwarf_type: Option, + pub availability: Availability, + pub lowering: VariableLoweringPlan, + pub materialization: VariableMaterialization, +} + /// Owned, PC-sensitive variable read plan before runtime-specific lowering. #[derive(Debug, Clone, PartialEq)] pub struct VariableReadPlan { pub name: String, pub type_name: String, + pub access_path: VariableAccessPath, pub dwarf_type: Option, pub declaration: Option, pub type_id: Option, @@ -77,7 +120,7 @@ pub struct VariableReadPlan { pub provenance: Provenance, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct VariableAccessPath { pub segments: Vec, } @@ -165,6 +208,7 @@ impl VariableReadPlan { Self { name: variable.name, type_name: variable.type_name, + access_path: VariableAccessPath::default(), dwarf_type: variable.dwarf_type, declaration: variable.declaration, type_id: variable.type_id, @@ -233,12 +277,72 @@ impl VariableReadPlan { } } + pub fn materialization_plan( + &self, + capabilities: &RuntimeCapabilities, + ) -> VariableMaterializationPlan { + let lowering = self.bpf_lowering_plan(capabilities); + let materialization = if !lowering.availability.is_available() { + VariableMaterialization::Unavailable { + availability: lowering.availability.clone(), + } + } else { + match lowering.kind { + VariableLoweringKind::DirectValue => VariableMaterialization::DirectValue { + address_origin: direct_value_address_origin(&self.location), + location: self.location.clone(), + }, + VariableLoweringKind::UserMemoryRead => { + match PlannedAddress::from_location(self.location.clone()) { + Some(address) => VariableMaterialization::UserMemoryRead { address }, + None => VariableMaterialization::Unavailable { + availability: Availability::Unsupported( + UnsupportedReason::AddressClass { + detail: format!( + "location {} cannot be materialized as an address", + self.location + ), + }, + ), + }, + } + } + VariableLoweringKind::Composite => match &self.location { + VariableLocation::Pieces(pieces) => VariableMaterialization::Composite { + pieces: pieces.clone(), + }, + _ => VariableMaterialization::Unavailable { + availability: Availability::Unsupported( + UnsupportedReason::ExpressionShape { + detail: "composite lowering without piece locations".to_string(), + }, + ), + }, + }, + VariableLoweringKind::Unavailable => VariableMaterialization::Unavailable { + availability: lowering.availability.clone(), + }, + } + }; + + VariableMaterializationPlan { + name: self.name.clone(), + type_name: self.type_name.clone(), + access_path: self.access_path.clone(), + dwarf_type: self.dwarf_type.clone(), + availability: lowering.availability.clone(), + lowering, + materialization, + } + } + pub fn plan_access_path(&self, path: &VariableAccessPath) -> Result { let mut plan = self.clone(); for segment in &path.segments { plan = plan.plan_access_segment(segment)?; } + plan.access_path.segments.extend(path.segments.clone()); plan.name.push_str(&path.suffix()); Ok(plan) } @@ -346,6 +450,54 @@ impl VariableReadPlan { } } +impl PlannedAddress { + pub fn from_location(location: VariableLocation) -> Option { + let origin = match &location { + VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { + address_origin_for_steps(&expr.steps) + } + VariableLocation::RegisterAddress { .. } + | VariableLocation::FrameBaseRelative { .. } => AddressOrigin::RuntimeDerived, + VariableLocation::ComputedAddress(steps) => address_origin_for_steps(steps), + VariableLocation::RegisterValue { .. } + | VariableLocation::ComputedValue(_) + | VariableLocation::ImplicitValue(_) + | VariableLocation::Pieces(_) + | VariableLocation::OptimizedOut + | VariableLocation::Unknown => return None, + }; + + Some(Self { location, origin }) + } + + pub fn constant_link_time_address(&self) -> Option { + match (&self.origin, &self.location) { + (AddressOrigin::LinkTime, VariableLocation::Address(expr)) + | (AddressOrigin::LinkTime, VariableLocation::AbsoluteAddressValue(expr)) => { + fold_constant_steps(&expr.steps) + } + (AddressOrigin::LinkTime, VariableLocation::ComputedAddress(steps)) => { + fold_constant_steps(steps) + } + _ => None, + } + } + + pub fn link_time_base_and_runtime_tail(&self) -> Option<(u64, &[ComputeStep])> { + if self.origin != AddressOrigin::LinkTimeBase { + return None; + } + + match &self.location { + VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { + link_time_base_and_runtime_tail(&expr.steps) + } + VariableLocation::ComputedAddress(steps) => link_time_base_and_runtime_tail(steps), + _ => None, + } + } +} + impl RuntimeCapabilities { pub fn supports_requirement(&self, requirement: &RuntimeRequirement) -> bool { match requirement { @@ -360,6 +512,89 @@ impl RuntimeCapabilities { } } +fn direct_value_address_origin(location: &VariableLocation) -> Option { + match location { + VariableLocation::AbsoluteAddressValue(expr) => Some(address_origin_for_steps(&expr.steps)), + _ => None, + } +} + +fn address_origin_for_steps(steps: &[ComputeStep]) -> AddressOrigin { + if fold_constant_steps(steps).is_some() { + return AddressOrigin::LinkTime; + } + + if link_time_base_and_runtime_tail(steps).is_some() { + return AddressOrigin::LinkTimeBase; + } + + if steps_reference_runtime_state(steps) { + AddressOrigin::RuntimeDerived + } else { + AddressOrigin::Unknown + } +} + +fn fold_constant_steps(steps: &[ComputeStep]) -> Option { + let mut const_stack: Vec = Vec::new(); + for step in steps { + match step { + ComputeStep::PushConstant(value) => const_stack.push(*value), + ComputeStep::Add => { + let rhs = const_stack.pop()?; + let lhs = const_stack.pop()?; + const_stack.push(lhs.saturating_add(rhs)); + } + _ => return None, + } + } + + if const_stack.len() == 1 && const_stack[0] >= 0 { + Some(const_stack[0] as u64) + } else { + None + } +} + +fn link_time_base_and_runtime_tail(steps: &[ComputeStep]) -> Option<(u64, &[ComputeStep])> { + let Some(ComputeStep::PushConstant(base)) = steps.first() else { + return None; + }; + + if *base < 0 { + return None; + } + + for step in steps.iter().skip(1) { + match step { + ComputeStep::LoadRegister(_) => { + break; + } + ComputeStep::Dereference { .. } => { + return Some((*base as u64, &steps[1..])); + } + _ => {} + } + } + + None +} + +fn steps_reference_runtime_state(steps: &[ComputeStep]) -> bool { + steps.iter().any(|step| match step { + ComputeStep::LoadRegister(_) + | ComputeStep::Dereference { .. } + | ComputeStep::EntryValueLookup { .. } => true, + ComputeStep::If { + then_branch, + else_branch, + } => { + steps_reference_runtime_state(then_branch) || steps_reference_runtime_state(else_branch) + } + _ => false, + }) +} + trait VariableLocationLoweringExt { fn lowering_kind(&self) -> VariableLoweringKind; fn runtime_requirements(&self) -> Vec; @@ -762,6 +997,7 @@ mod tests { VariableReadPlan { name: "value".to_string(), type_name: "int".to_string(), + access_path: VariableAccessPath::default(), dwarf_type: None, declaration: None, type_id: None, @@ -822,6 +1058,118 @@ mod tests { assert!(lowering.required_registers.is_empty()); } + #[test] + fn materialization_plan_preserves_link_time_address_origin() { + let plan = read_plan(VariableLocation::Address(AddressExpr::constant(0x1000))); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::UserMemoryRead { address } => { + assert_eq!(address.origin, AddressOrigin::LinkTime); + assert_eq!(address.constant_link_time_address(), Some(0x1000)); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_marks_static_base_before_deref() { + let plan = read_plan(VariableLocation::ComputedAddress(vec![ + ComputeStep::PushConstant(0x3000), + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ComputeStep::PushConstant(16), + ComputeStep::Add, + ])); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::UserMemoryRead { address } => { + assert_eq!(address.origin, AddressOrigin::LinkTimeBase); + let (base, tail) = address + .link_time_base_and_runtime_tail() + .expect("link-time base"); + assert_eq!(base, 0x3000); + assert_eq!(tail.len(), 3); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_preserves_arithmetic_before_first_deref() { + let plan = read_plan(VariableLocation::ComputedAddress(vec![ + ComputeStep::PushConstant(0x3000), + ComputeStep::PushConstant(8), + ComputeStep::Add, + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ])); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::UserMemoryRead { address } => { + assert_eq!(address.origin, AddressOrigin::LinkTimeBase); + let (base, tail) = address + .link_time_base_and_runtime_tail() + .expect("link-time base"); + assert_eq!(base, 0x3000); + assert_eq!( + tail, + &[ + ComputeStep::PushConstant(8), + ComputeStep::Add, + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ] + ); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_keeps_absolute_address_value_direct() { + let plan = read_plan(VariableLocation::AbsoluteAddressValue( + AddressExpr::constant(0x2000), + )); + let materialized = plan.materialization_plan(&capabilities(false)); + + match materialized.materialization { + VariableMaterialization::DirectValue { + address_origin, + location, + } => { + assert_eq!(address_origin, Some(AddressOrigin::LinkTime)); + assert!(matches!( + location, + VariableLocation::AbsoluteAddressValue(_) + )); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_surfaces_piece_locations_without_first_piece_fallback() { + let plan = read_plan(VariableLocation::Pieces(vec![PieceLocation { + bit_offset: 0, + bit_size: 32, + location: Box::new(VariableLocation::RegisterValue { dwarf_reg: 0 }), + }])); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::Composite { pieces } => { + assert_eq!(pieces.len(), 1); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + #[test] fn absolute_address_value_lowers_without_user_memory_read() { let plan = read_plan(VariableLocation::AbsoluteAddressValue( @@ -940,6 +1288,7 @@ mod tests { let planned = plan.plan_access_path(&access).expect("field access"); assert_eq!(planned.name, "value.fd"); + assert_eq!(planned.access_path, access); assert_eq!(planned.dwarf_type, Some(int_type)); assert_eq!( planned.location, @@ -948,6 +1297,13 @@ mod tests { offset: -20, } ); + assert_eq!( + planned + .materialization_plan(&capabilities(true)) + .access_path + .segments, + vec![VariableAccessSegment::Field("fd".to_string())] + ); } #[test]