-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathir_generator.ml
More file actions
3397 lines (3088 loc) · 160 KB
/
ir_generator.ml
File metadata and controls
3397 lines (3088 loc) · 160 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(*
* Copyright 2025 Multikernel Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*)
(** IR Generator - AST to IR Lowering
This module implements the lowering from typed AST to IR, including:
- Expression and statement lowering
- Control flow graph construction
- Built-in function expansion
- Safety check insertion
- Map operation lowering
*)
open Ast
open Ir
open Loop_analysis
module StringSet = Set.Make(String)
(** Context for IR generation *)
type ir_context = {
(* Next available temporary variable ID *)
mutable next_temp_id: int;
(* Current basic block being built *)
mutable current_block: ir_instruction list;
(* All basic blocks generated *)
mutable blocks: ir_basic_block list;
(* Next block ID *)
mutable next_block_id: int;
(* Current stack usage *)
mutable stack_usage: int;
(* Map declarations in scope *)
maps: (string, ir_map_def) Hashtbl.t;
(* Function being processed *)
mutable current_function: string option;
(* Symbol table reference *)
symbol_table: Symbol_table.symbol_table;
(* Helper function names to avoid tail call conversion *)
helper_functions: (string, unit) Hashtbl.t;
(* Assignment optimization info *)
mutable assignment_optimizations: Map_assignment.optimization_info option;
(* Constant environment for loop analysis *)
mutable const_env: Loop_analysis.const_env option;
mutable in_bpf_loop_callback: bool; (* New field to track bpf_loop context *)
mutable is_userspace: bool; (* New field to track if the program is userspace *)
mutable in_try_block: bool; (* New field to track if we're inside a try block *)
(* Track variable names to their original declared type names *)
variable_declared_types: (string, string) Hashtbl.t; (* variable_name -> original_type_name *)
(* Track function parameters to avoid allocating registers for them *)
function_parameters: (string, ir_type) Hashtbl.t; (* param_name -> param_type *)
(* Track global variables for proper access *)
global_variables: (string, ir_global_variable) Hashtbl.t; (* global_var_name -> global_var *)
(* Track variables that originate from map accesses *)
map_origin_variables: (string, (string * ir_value * (ir_value_desc * ir_type))) Hashtbl.t; (* var_name -> (map_name, key, underlying_info) *)
(* Track inferred variable types for proper lookups *)
variable_types: (string, ir_type) Hashtbl.t; (* var_name -> ir_type *)
mutable current_program_type: program_type option;
}
(** Create new IR generation context *)
let create_context ?(global_variables = []) ?(helper_functions = []) symbol_table = {
next_temp_id = 0;
current_block = [];
blocks = [];
next_block_id = 0;
stack_usage = 0;
maps = Hashtbl.create 16;
current_function = None;
symbol_table;
assignment_optimizations = None;
const_env = None;
in_bpf_loop_callback = false;
is_userspace = false;
in_try_block = false;
variable_declared_types = Hashtbl.create 32;
function_parameters = Hashtbl.create 32;
global_variables = (let tbl = Hashtbl.create 16 in
List.iter (fun gv -> Hashtbl.add tbl gv.global_var_name gv) global_variables;
tbl);
map_origin_variables = Hashtbl.create 32;
variable_types = Hashtbl.create 32;
current_program_type = None;
helper_functions = (let tbl = Hashtbl.create 16 in
List.iter (fun helper_name -> Hashtbl.add tbl helper_name ()) helper_functions;
tbl);
}
(** Generate a new temporary variable name *)
let generate_temp_variable ctx base_name =
let temp_name = Printf.sprintf "__%s_%d" base_name ctx.next_temp_id in
ctx.next_temp_id <- ctx.next_temp_id + 1;
temp_name
(** Helper function to generate temporary variable and create IR value in one step *)
let allocate_temp_variable ctx base_name ir_type pos =
let temp_name = generate_temp_variable ctx base_name in
make_ir_value (IRTempVariable temp_name) ir_type pos
(** Create new basic block *)
let create_basic_block ctx label =
let block_id = ctx.next_block_id in
ctx.next_block_id <- ctx.next_block_id + 1;
let block = make_ir_basic_block label (List.rev ctx.current_block) block_id in
ctx.blocks <- block :: ctx.blocks;
ctx.current_block <- [];
block
(** Analyze assignment patterns for optimization *)
let analyze_assignment_patterns ctx (ast: declaration list) =
let assignments = Map_assignment.extract_map_assignments_from_ast ast in
let optimization_info = Map_assignment.analyze_assignment_optimizations assignments in
ctx.assignment_optimizations <- Some optimization_info;
optimization_info
(** Add instruction to current block *)
let emit_instruction ctx instr =
ctx.current_block <- instr :: ctx.current_block;
ctx.stack_usage <- ctx.stack_usage + instr.instr_stack_usage
(** Emit variable declaration - takes an ir_value to preserve IRVariable vs IRTempVariable *)
let emit_variable_decl_val ctx dest_val ir_type init_expr_opt pos =
let instr = make_ir_instruction (IRVariableDecl (dest_val, ir_type, init_expr_opt)) pos in
emit_instruction ctx instr
(** Emit variable declaration for a user-level variable by name *)
let emit_variable_decl ctx var_name ir_type init_expr_opt pos =
let dest_val = make_ir_value (IRVariable var_name) ir_type pos in
emit_variable_decl_val ctx dest_val ir_type init_expr_opt pos
(** Expand ring buffer operations to IR instructions *)
let expand_ringbuf_operation ctx ringbuf_name operation arg_vals pos =
(* Get the ring buffer variable - could be local or global *)
let (ringbuf_val, ringbuf_type) =
if Hashtbl.mem ctx.global_variables ringbuf_name then
(* Global variable *)
let global_var = Hashtbl.find ctx.global_variables ringbuf_name in
(make_ir_value (IRVariable ringbuf_name) global_var.global_var_type pos, global_var.global_var_type)
else
(* Local variable - look up in symbol table *)
(match Symbol_table.lookup_symbol ctx.symbol_table ringbuf_name with
| Some symbol ->
(match symbol.kind with
| Symbol_table.Variable var_ast_type ->
let ringbuf_type = ast_type_to_ir_type_with_context ctx.symbol_table var_ast_type in
(make_ir_value (IRVariable ringbuf_name) ringbuf_type pos, ringbuf_type)
| _ -> failwith ("Variable is not a ringbuf: " ^ ringbuf_name))
| None -> failwith ("Ringbuf variable not found in symbol table: " ^ ringbuf_name))
in
match operation with
| "reserve" ->
(* reserve() returns a pointer to the value type *)
let result_type = match ringbuf_type with
| IRRingbuf (value_type, _) -> IRPointer (value_type, make_bounds_info ())
| _ -> failwith ("Variable is not a ringbuf type")
in
(* Create a special IR expression that directly represents the ringbuf reserve call *)
(* This will be converted to the proper C call without intermediate assignments *)
let result_val = allocate_temp_variable ctx "ringbuf_reserve" result_type pos in
let ringbuf_op = RingbufReserve result_val in
let instr = make_ir_instruction (IRRingbufOp (ringbuf_val, ringbuf_op)) pos in
emit_instruction ctx instr;
result_val
| "submit" ->
(* submit(data_pointer) returns i32 success code *)
let data_val = match arg_vals with
| [data] -> data
| _ -> failwith ("Ring buffer submit() requires exactly one argument")
in
let result_val = allocate_temp_variable ctx "ringbuf_submit" IRI32 pos in
let ringbuf_op = RingbufSubmit data_val in
let instr = make_ir_instruction (IRRingbufOp (ringbuf_val, ringbuf_op)) pos in
emit_instruction ctx instr;
result_val
| "discard" ->
(* discard(data_pointer) returns i32 success code *)
let data_val = match arg_vals with
| [data] -> data
| _ -> failwith ("Ring buffer discard() requires exactly one argument")
in
let result_val = allocate_temp_variable ctx "ringbuf_discard" IRI32 pos in
let ringbuf_op = RingbufDiscard data_val in
let instr = make_ir_instruction (IRRingbufOp (ringbuf_val, ringbuf_op)) pos in
emit_instruction ctx instr;
result_val
| "on_event" ->
(* on_event(handler) returns i32 success code *)
let handler_name = match arg_vals with
| [handler_val] ->
(* Extract function name from the handler value *)
(match handler_val.value_desc with
| IRFunctionRef name -> name
| IRVariable name -> name (* Function variable *)
| _ -> failwith ("Ring buffer on_event() requires a function argument"))
| _ -> failwith ("Ring buffer on_event() requires exactly one argument")
in
let result_val = allocate_temp_variable ctx "ringbuf_on_event" IRI32 pos in
let ringbuf_op = RingbufOnEvent handler_name in
let instr = make_ir_instruction (IRRingbufOp (ringbuf_val, ringbuf_op)) pos in
emit_instruction ctx instr;
result_val
| _ ->
failwith ("Unknown ring buffer operation: " ^ operation)
(** Generate bounds information for types *)
let generate_bounds_info ast_type = match ast_type with
| Ast.Array (_, size) -> make_bounds_info ~min_size:size ~max_size:size ()
| Ast.Pointer _ -> make_bounds_info ~nullable:true ()
| _ -> make_bounds_info ()
(** Lower AST literals to IR values *)
let lower_literal lit pos =
let ir_lit = IRLiteral lit in
let ir_type = match lit with
| IntLit (_, _) -> IRU32 (* Default integer type *)
| StringLit s -> IRStr (max 1 (String.length s)) (* String literals get IRStr type *)
| CharLit _ -> IRChar
| BoolLit _ -> IRBool
| NullLit ->
let bounds = make_bounds_info ~nullable:true () in
IRPointer (IRU32, bounds) (* null literal as nullable pointer to u32 *)
| NoneLit -> IRU32 (* none literal as sentinel u32 value *)
| ArrayLit init_style ->
(* Handle enhanced array literal lowering *)
(match init_style with
| ZeroArray ->
(* [] - zero initialize, size determined by context *)
IRArray (IRU32, 0, make_bounds_info ())
| FillArray fill_lit ->
(* [0] - fill entire array with single value, size from context *)
let element_ir_type = match fill_lit with
| IntLit _ -> IRU32
| BoolLit _ -> IRBool
| CharLit _ -> IRChar
| StringLit _ -> IRPointer (IRU8, make_bounds_info ~nullable:false ())
| NullLit ->
let bounds = make_bounds_info ~nullable:true () in
IRPointer (IRU32, bounds)
| NoneLit -> IRU32 (* none literal as sentinel u32 value *)
| ArrayLit _ -> IRU32 (* Nested arrays default to u32 *)
in
IRArray (element_ir_type, 0, make_bounds_info ()) (* Size resolved during type unification *)
| ExplicitArray literals ->
(* [a,b,c] - explicit values, zero-fill rest *)
let element_count = List.length literals in
if element_count = 0 then
IRArray (IRU32, 0, make_bounds_info ())
else
let first_lit = List.hd literals in
let element_ir_type = match first_lit with
| IntLit _ -> IRU32
| BoolLit _ -> IRBool
| CharLit _ -> IRChar
| StringLit _ -> IRPointer (IRU8, make_bounds_info ~nullable:false ())
| ArrayLit _ -> IRU32 (* Nested arrays default to u32 *)
| NullLit ->
let bounds = make_bounds_info ~nullable:true () in
IRPointer (IRU32, bounds)
| NoneLit -> IRU32 (* none literal as sentinel u32 value *)
in
let bounds_info = make_bounds_info ~min_size:element_count ~max_size:element_count () in
IRArray (element_ir_type, element_count, bounds_info))
in
make_ir_value ir_lit ir_type pos
(** Lower AST binary operators to IR *)
let lower_binary_op = function
| Add -> IRAdd | Sub -> IRSub | Mul -> IRMul | Div -> IRDiv | Mod -> IRMod
| Eq -> IREq | Ne -> IRNe | Lt -> IRLt | Le -> IRLe | Gt -> IRGt | Ge -> IRGe
| And -> IRAnd | Or -> IROr
(** Lower AST unary operators to IR *)
let lower_unary_op = function
| Not -> IRNot
| Neg -> IRNeg
| Deref -> IRDeref
| AddressOf -> IRAddressOf
(** Convert context field C type to IR type *)
let c_type_to_ir_type = function
| "__u8*" -> IRPointer (IRU8, make_bounds_info ~nullable:false ())
| "__u16*" -> IRPointer (IRU16, make_bounds_info ~nullable:false ())
| "__u32*" -> IRPointer (IRU32, make_bounds_info ~nullable:false ())
| "__u64*" -> IRPointer (IRU64, make_bounds_info ~nullable:false ())
| "__u8" -> IRU8
| "__u16" -> IRU16
| "__u32" -> IRU32
| "__u64" -> IRU64
| "void*" -> IRPointer (IRU8, make_bounds_info ~nullable:false ())
| c_type -> failwith ("Unsupported context field C type: " ^ c_type)
(** Unified AST literal to IR type conversion *)
let literal_to_ir_type = function
| IntLit _ -> IRU32
| BoolLit _ -> IRBool
| CharLit _ -> IRChar
| StringLit _ -> IRPointer (IRU8, make_bounds_info ~nullable:false ())
| NullLit -> IRPointer (IRU32, make_bounds_info ~nullable:true ())
| NoneLit -> IRU32
| ArrayLit _ -> IRU32 (* Default for arrays *)
(** Unified AST to IR type conversion for basic types *)
let ast_basic_type_to_ir_type = function
| Ast.U8 -> IRU8
| Ast.U16 -> IRU16
| Ast.U32 -> IRU32
| Ast.U64 -> IRU64
| Ast.I8 -> IRI8
| Ast.I16 -> IRI16
| Ast.I32 -> IRI32
| Ast.I64 -> IRI64
| Ast.Bool -> IRBool
| Ast.Char -> IRChar
| _ -> failwith "Not a basic type"
(** Helper to add maps to context hashtable *)
let add_maps_to_context ctx maps =
List.iter (fun (map_def : ir_map_def) ->
Hashtbl.add ctx.maps map_def.map_name map_def
) maps
(** Helper to copy maps between contexts *)
let copy_maps_to_context source_ctx target_ctx =
Hashtbl.iter (fun map_name map_def ->
Hashtbl.add target_ctx.maps map_name map_def
) source_ctx.maps
(** Helper to extract kernel struct name from @struct_ops attribute **)
let extract_struct_ops_kernel_name attributes =
List.fold_left (fun acc attr ->
match attr with
| Ast.AttributeWithArg ("struct_ops", name) -> name
| _ -> acc
) "" attributes
let ast_struct_has_field ast struct_name field_name =
List.exists (function
| Ast.StructDecl struct_def when struct_def.Ast.struct_name = struct_name ->
List.exists (fun (name, _) -> name = field_name) struct_def.Ast.struct_fields
| _ -> false
) ast
let impl_block_has_static_field impl_block field_name =
List.exists (function
| Ast.ImplStaticField (name, _) when name = field_name -> true
| _ -> false
) impl_block.Ast.impl_items
let normalize_struct_ops_instance_name name =
let buffer = Buffer.create (String.length name * 2) in
let is_uppercase ch = ch >= 'A' && ch <= 'Z' in
let is_lowercase ch = ch >= 'a' && ch <= 'z' in
let is_digit ch = ch >= '0' && ch <= '9' in
let add_separator_if_needed idx ch =
if idx > 0 && is_uppercase ch then
let prev = name.[idx - 1] in
let next_is_lowercase = idx + 1 < String.length name && is_lowercase name.[idx + 1] in
if is_lowercase prev || is_digit prev || (is_uppercase prev && next_is_lowercase) then
Buffer.add_char buffer '_'
in
String.iteri (fun idx ch ->
add_separator_if_needed idx ch;
let normalized =
if is_uppercase ch then Char.lowercase_ascii ch
else if is_lowercase ch || is_digit ch || ch = '_' then ch
else '_'
in
Buffer.add_char buffer normalized
) name;
Buffer.contents buffer
let generate_default_struct_ops_name instance_name =
let max_len = 15 in
let normalized = normalize_struct_ops_instance_name instance_name in
if String.length normalized <= max_len then normalized
else
let parts = List.filter (fun part -> part <> "") (String.split_on_char '_' normalized) in
match parts with
| [] -> String.sub normalized 0 max_len
| first :: rest ->
let abbreviated =
match rest with
| [] -> first
| _ ->
let initials = rest |> List.map (fun part -> String.make 1 part.[0]) |> String.concat "" in
first ^ "_" ^ initials
in
if String.length abbreviated <= max_len then abbreviated
else String.sub abbreviated 0 max_len
let should_lower_as_implicit_tail_call ctx name =
let is_function_pointer =
Hashtbl.mem ctx.function_parameters name ||
match Hashtbl.find_opt ctx.variable_types name with
| Some (IRFunctionPointer _) -> true
| _ -> false
in
if is_function_pointer || Hashtbl.mem ctx.helper_functions name then
false
else
match ctx.current_function, ctx.current_program_type with
| Some _, Some Ast.StructOps -> false
| Some current_func_name, Some _ ->
let caller_is_attributed =
try Symbol_table.lookup_function ctx.symbol_table current_func_name <> None
with _ -> false
in
let target_is_attributed =
try Symbol_table.lookup_function ctx.symbol_table name <> None
with _ -> false
in
caller_is_attributed && target_is_attributed
| _ -> false
(** Map struct names to their corresponding context types *)
let struct_name_to_context_type = function
| "xdp_md" -> Some "xdp"
| "__sk_buff" -> Some "tc"
| "pt_regs" -> Some "kprobe"
(* trace_event_raw_* structs are regular structs, not context types *)
| _ -> None
(** Determine result type for arrow access expressions *)
let determine_arrow_access_type ctx obj_val field expr_type_opt =
match obj_val.val_type with
| IRPointer (IRStruct (struct_name, _), _) ->
(* Check if this is a context struct *)
(match struct_name_to_context_type struct_name with
| Some ctx_type_str ->
(* Use field mapping to get precise type information *)
(match Kernelscript_context.Context_codegen.get_context_field_c_type ctx_type_str field with
| Some c_type -> c_type_to_ir_type c_type
| None -> failwith ("Unknown context field: " ^ field ^ " for context type: " ^ ctx_type_str))
| None ->
(* Regular struct field access *)
(match expr_type_opt with
| Some ast_type -> ast_type_to_ir_type_with_context ctx.symbol_table ast_type
| None -> IRU32))
| _ ->
(* Non-context types - use expression type annotation *)
(match expr_type_opt with
| Some ast_type -> ast_type_to_ir_type_with_context ctx.symbol_table ast_type
| None -> IRU32)
(** Generate bounds check for array access *)
let generate_array_bounds_check ctx array_val index_val pos =
match array_val.val_type with
| IRArray (_, size, _) ->
let bounds_check = {
value = index_val;
min_bound = 0;
max_bound = size - 1;
check_type = ArrayAccess;
} in
let instr = make_ir_instruction
(IRBoundsCheck (index_val, 0, size - 1))
~bounds_checks:[bounds_check]
~verifier_hints:[BoundsChecked]
pos
in
emit_instruction ctx instr
| _ -> ()
(** Map context field names to IR access types using BTF-integrated context codegen *)
(* No longer needed - we use BTF field names directly *)
(** Handle context field access with comprehensive BTF support *)
let handle_context_field_access_comprehensive ctx_type _obj_val field result_val expr_pos =
(* Check if field exists in BTF-integrated context codegen *)
match Kernelscript_context.Context_codegen.get_context_field_c_type ctx_type field with
| Some _c_type ->
(* Field exists in BTF - generate direct field access using BTF field name *)
let instr = make_ir_instruction
(IRContextAccess (result_val, ctx_type, field))
expr_pos
in
Some instr
| None ->
(* Field doesn't exist in BTF *)
None
(** Expand map operations *)
let expand_map_operation ctx map_name operation key_val value_val_opt pos =
let map_def = Hashtbl.find ctx.maps map_name in
let map_val = make_ir_value (IRMapRef map_name)
(IRPointer (IRStruct ("map", []), make_bounds_info ())) pos in
match operation with
| "lookup" ->
(* Map lookup returns pointer to value type, not value type itself *)
let result_val = allocate_temp_variable ctx "map_lookup"
(IRPointer (map_def.map_value_type, make_bounds_info ())) pos in
let instr = make_ir_instruction
(IRMapLoad (map_val, key_val, result_val, MapLookup))
~verifier_hints:[HelperCall "map_lookup_elem"]
pos
in
emit_instruction ctx instr;
result_val
| "update" ->
let value_val = match value_val_opt with
| Some v -> v
| None -> failwith "Map update requires value"
in
let instr = make_ir_instruction
(IRMapStore (map_val, key_val, value_val, MapUpdate))
~verifier_hints:[HelperCall "map_update_elem"]
pos
in
emit_instruction ctx instr;
(* Return success value *)
make_ir_value (IRLiteral (IntLit (Ast.Signed64 0L, None))) IRU32 pos
| "delete" ->
let instr = make_ir_instruction
(IRMapDelete (map_val, key_val))
~verifier_hints:[HelperCall "map_delete_elem"]
pos
in
emit_instruction ctx instr;
make_ir_value (IRLiteral (IntLit (Ast.Signed64 0L, None))) IRU32 pos
| _ -> failwith ("Unknown map operation: " ^ operation)
(** Lower AST expressions to IR values *)
let rec lower_expression ctx (expr : Ast.expr) =
match expr.expr_desc with
| Ast.Literal lit ->
lower_literal lit expr.expr_pos
| Ast.Identifier name ->
(* Check if this is a map identifier *)
if Hashtbl.mem ctx.maps name then
(* For map identifiers, create a map reference *)
let map_type = IRPointer (IRU8, make_bounds_info ()) in (* Maps are represented as pointers *)
make_ir_value (IRMapRef name) map_type expr.expr_pos
else
(* Check if this variable originates from a map access *)
(match Hashtbl.find_opt ctx.map_origin_variables name with
| Some (map_name, key, underlying_info) ->
(* This variable originates from a map access - recreate the IRMapAccess *)
let map_def = Hashtbl.find ctx.maps map_name in
{ value_desc = IRMapAccess (map_name, key, underlying_info);
val_type = map_def.map_value_type;
stack_offset = None;
bounds_checked = false;
val_pos = expr.expr_pos }
| None ->
(* Regular variable or function reference *)
(match expr.expr_type with
| Some (Function (param_types, return_type)) ->
(* Function references should be converted to function references *)
let ir_param_types = List.map ast_type_to_ir_type param_types in
let ir_return_type = ast_type_to_ir_type return_type in
let func_type = IRFunctionPointer (ir_param_types, ir_return_type) in
make_ir_value (IRFunctionRef name) func_type expr.expr_pos
| Some (ProgramRef _) ->
(* Program references should be converted to string literals containing the program name *)
make_ir_value (IRLiteral (StringLit name)) IRU32 expr.expr_pos
| _ ->
(* Regular variable lookup *)
if Hashtbl.mem ctx.function_parameters name then
let param_type = Hashtbl.find ctx.function_parameters name in
make_ir_value (IRVariable name) param_type expr.expr_pos
else if Hashtbl.mem ctx.global_variables name then
let global_var = Hashtbl.find ctx.global_variables name in
make_ir_value (IRVariable name) global_var.global_var_type expr.expr_pos
else
(* Check symbol table for various types of identifiers *)
(match Symbol_table.lookup_symbol ctx.symbol_table name with
| Some symbol ->
(match symbol.kind with
| Symbol_table.EnumConstant (enum_name, Some value) ->
(* Preserve enum constants as identifiers *)
let ir_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None -> IRU32
in
(* Use the inferred type directly - no special handling for action types *)
let final_ir_type = ir_type in
make_ir_value (IREnumConstant (enum_name, name, value)) final_ir_type expr.expr_pos
| Symbol_table.EnumConstant (_, None) ->
(* Enum constant without value - treat as variable *)
let ir_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None -> failwith ("Untyped identifier: " ^ name)
in
make_ir_value (IRVariable name) ir_type expr.expr_pos
| Symbol_table.TypeDef _ ->
(* This is a type definition (like impl blocks) - treat as variable *)
let ir_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type_with_context ctx.symbol_table ast_type
| None -> IRStruct (name, []) (* Default to struct type for impl blocks *)
in
make_ir_value (IRVariable name) ir_type expr.expr_pos
| _ ->
(* Other symbol types - treat as variable *)
let ir_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None -> failwith ("Untyped identifier: " ^ name)
in
make_ir_value (IRVariable name) ir_type expr.expr_pos)
| None ->
(* Symbol not found - treat as regular variable *)
let ir_type =
(* Always prioritize the tracked variable type from declaration *)
match Hashtbl.find_opt ctx.variable_types name with
| Some tracked_type -> tracked_type
| None ->
(* Fall back to expression type annotation *)
(match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None ->
(* Final fallback to symbol table lookup *)
(match Symbol_table.lookup_symbol ctx.symbol_table name with
| Some symbol ->
(match symbol.kind with
| Symbol_table.Variable var_ast_type ->
ast_type_to_ir_type_with_context ctx.symbol_table var_ast_type
| _ -> failwith ("Untyped identifier: " ^ name))
| None -> failwith ("Untyped identifier: " ^ name)))
in
make_ir_value (IRVariable name) ir_type expr.expr_pos)))
| Ast.ConfigAccess (config_name, field_name) ->
(* Handle config access like config.field_name *)
let result_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None -> IRU32 (* Default type for config fields *)
in
let result_val = allocate_temp_variable ctx "config_access" result_type expr.expr_pos in
(* Generate new IRConfigAccess instruction *)
let config_access_instr = make_ir_instruction
(IRConfigAccess (config_name, field_name, result_val))
expr.expr_pos
in
emit_instruction ctx config_access_instr;
result_val
| Ast.TailCall (name, _args) ->
(* This shouldn't be reached during normal IR generation *)
(* Tail calls are handled specifically in return statements *)
failwith ("Tail call to " ^ name ^ " should only appear in return statements")
| Ast.ModuleCall module_call ->
(* Module calls are handled by userspace code generation, not IR *)
failwith ("Module call to " ^ module_call.module_name ^ "." ^ module_call.function_name ^ " should be handled in userspace code generation")
| Ast.Call (callee_expr, args) ->
let arg_vals = List.map (lower_expression ctx) args in
(* Check if this is a void function call *)
let is_void_call = match expr.expr_type with
| Some Ast.Void -> true
| _ -> false
in
(* Determine call type based on callee expression *)
(match callee_expr.expr_desc with
| Ast.Identifier name ->
(* Check if this is a variable holding a function pointer or a direct function call *)
if name = "register" then
(* Special handling for register() builtin function *)
handle_register_builtin_call ctx args expr.expr_pos ()
else if Hashtbl.mem ctx.function_parameters name ||
(Hashtbl.mem ctx.variable_types name &&
match Hashtbl.find ctx.variable_types name with
| IRFunctionPointer _ -> true
| _ -> false) then
(* This is a variable holding a function pointer - use FunctionPointerCall *)
let callee_val = lower_expression ctx callee_expr in
if is_void_call then
(* Void function pointer call - no return value *)
let instr = make_ir_instruction
(IRCall (FunctionPointerCall callee_val, arg_vals, None))
expr.expr_pos
in
emit_instruction ctx instr;
(* Return a dummy value for void calls - this should not be used *)
make_ir_value (IRLiteral (IntLit (Ast.Signed64 0L, None))) IRU32 expr.expr_pos
else
(* Non-void function pointer call *)
let result_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None -> IRU32
in
let result_val = allocate_temp_variable ctx "func_ptr_call" result_type expr.expr_pos in
let instr = make_ir_instruction
(IRCall (FunctionPointerCall callee_val, arg_vals, Some result_val))
expr.expr_pos
in
emit_instruction ctx instr;
result_val
else
(* This is a direct function call *)
if is_void_call then
(* Void function call - no return value *)
let instr = make_ir_instruction
(IRCall (DirectCall name, arg_vals, None))
expr.expr_pos
in
emit_instruction ctx instr;
(* Return a dummy value for void calls - this should not be used *)
make_ir_value (IRLiteral (IntLit (Ast.Signed64 0L, None))) IRU32 expr.expr_pos
else
(* Non-void function call *)
let result_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None -> IRU32
in
let result_val = allocate_temp_variable ctx "func_call" result_type expr.expr_pos in
let instr = make_ir_instruction
(IRCall (DirectCall name, arg_vals, Some result_val))
expr.expr_pos
in
emit_instruction ctx instr;
result_val
| Ast.FieldAccess ({expr_desc = Ast.Identifier obj_name; _}, method_name) ->
(* Method call (e.g., ringbuf.operation()) *)
if Hashtbl.mem ctx.maps obj_name then
(* Handle map operations *)
let key_val = if List.length arg_vals > 0 then List.hd arg_vals
else make_ir_value (IRLiteral (IntLit (Ast.Signed64 0L, None))) IRU32 expr.expr_pos in
let value_val_opt = if List.length arg_vals > 1 then Some (List.nth arg_vals 1) else None in
expand_map_operation ctx obj_name method_name key_val value_val_opt expr.expr_pos
else
(* Check if this is a local or global variable that supports method calls *)
let var_type_opt =
(* First check tracked variable types *)
match Hashtbl.find_opt ctx.variable_types obj_name with
| Some ir_type -> Some ir_type
| None ->
(* Check global variables *)
if Hashtbl.mem ctx.global_variables obj_name then
let global_var = Hashtbl.find ctx.global_variables obj_name in
Some global_var.global_var_type
else
(* Fall back to symbol table lookup *)
(match Symbol_table.lookup_symbol ctx.symbol_table obj_name with
| Some symbol ->
(match symbol.kind with
| Symbol_table.Variable var_ast_type ->
Some (ast_type_to_ir_type_with_context ctx.symbol_table var_ast_type)
| _ -> None)
| None -> None)
in
(match var_type_opt with
| Some (IRRingbuf (_, _)) ->
(* This is a ringbuf object that supports method calls *)
expand_ringbuf_operation ctx obj_name method_name arg_vals expr.expr_pos
| Some var_type ->
failwith ("Method call '" ^ method_name ^ "' not supported on variable '" ^ obj_name ^ "' of type: " ^ (string_of_ir_type var_type))
| None ->
failwith ("Unknown method call: " ^ obj_name ^ "." ^ method_name))
| _ ->
(* Function pointer call - use FunctionPointerCall target *)
let callee_val = lower_expression ctx callee_expr in
(* Use the arg_vals that were already calculated at the beginning of the Call case *)
if is_void_call then
(* Void function pointer call - no return value *)
let instr = make_ir_instruction
(IRCall (FunctionPointerCall callee_val, arg_vals, None))
expr.expr_pos
in
emit_instruction ctx instr;
(* Return a dummy value for void calls - this should not be used *)
make_ir_value (IRLiteral (IntLit (Ast.Signed64 0L, None))) IRU32 expr.expr_pos
else
(* Non-void function pointer call *)
let result_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None -> IRU32
in
let result_val = allocate_temp_variable ctx "func_ptr_call" result_type expr.expr_pos in
let instr = make_ir_instruction
(IRCall (FunctionPointerCall callee_val, arg_vals, Some result_val))
expr.expr_pos
in
emit_instruction ctx instr;
result_val)
| Ast.ArrayAccess (array_expr, index_expr) ->
(* Check if this is map access first, before calling lower_expression on array *)
(match array_expr.expr_desc with
| Ast.Identifier map_name when Hashtbl.mem ctx.maps map_name ->
(* This is map access - handle it specially *)
let index_val = lower_expression ctx index_expr in
let lookup_result = expand_map_operation ctx map_name "lookup" index_val None expr.expr_pos in
(* Use the pointer type returned by expand_map_operation, not the value type *)
{ value_desc = IRMapAccess (map_name, index_val, (lookup_result.value_desc, lookup_result.val_type));
val_type = lookup_result.val_type; (* Use the pointer type from lookup_result *)
stack_offset = None;
bounds_checked = false;
val_pos = expr.expr_pos }
| _ ->
(* Regular array access *)
let array_val = lower_expression ctx array_expr in
let index_val = lower_expression ctx index_expr in
(* Generate bounds check *)
generate_array_bounds_check ctx array_val index_val expr.expr_pos;
let element_type = match array_val.val_type with
| IRArray (elem_type, _, _) -> elem_type
| IRStr _ -> IRChar (* String indexing returns char *)
| _ -> failwith "Array access on non-array type"
in
let result_val = allocate_temp_variable ctx "array_access" element_type expr.expr_pos in
(match array_val.val_type with
| IRStr _ ->
(* For strings, generate direct indexing: str.data[index] *)
let index_expr = make_ir_expr (IRBinOp (array_val, IRAdd, index_val)) element_type expr.expr_pos in
(* For strings, we need to emit a variable declaration and assignment *)
emit_variable_decl_val ctx result_val element_type (Some index_expr) expr.expr_pos
| _ ->
(* For arrays, generate pointer arithmetic and load *)
let ptr_val = allocate_temp_variable ctx "array_ptr"
(IRPointer (element_type, make_bounds_info ())) expr.expr_pos in
(* ptr = &array[index] *)
let ptr_expr = make_ir_expr (IRBinOp (array_val, IRAdd, index_val))
ptr_val.val_type expr.expr_pos in
emit_variable_decl_val ctx ptr_val ptr_val.val_type (Some ptr_expr) expr.expr_pos;
(* result = *ptr *)
let load_expr = make_ir_expr (IRValue ptr_val) element_type expr.expr_pos in
emit_variable_decl_val ctx result_val element_type (Some load_expr) expr.expr_pos);
result_val)
| Ast.FieldAccess (obj_expr, field) ->
let obj_val = lower_expression ctx obj_expr in
let result_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type_with_context ctx.symbol_table ast_type
| None -> IRU32
in
let result_val = allocate_temp_variable ctx "field_access" result_type expr.expr_pos in
(* Handle field access for different types *)
(match obj_val.val_type with
| IRStruct (struct_name, _) ->
(* Check if this is a context struct *)
(match struct_name_to_context_type struct_name with
| Some ctx_type_str ->
(* Handle context field access using centralized mapping *)
(match handle_context_field_access_comprehensive ctx_type_str obj_val field result_val expr.expr_pos with
| Some instr ->
emit_instruction ctx instr;
result_val
| None ->
failwith ("Unknown context field: " ^ field ^ " for context type: " ^ ctx_type_str))
| None ->
(* Handle regular struct field access *)
let field_expr = make_ir_expr (IRFieldAccess (obj_val, field)) result_type expr.expr_pos in
emit_variable_decl_val ctx result_val result_type (Some field_expr) expr.expr_pos;
result_val)
| IRRingbuf (_, _) ->
(* Handle ring buffer field access - convert to method calls *)
(match field with
| "reserve" ->
(* reserve() - generate ring buffer reserve operation *)
let ringbuf_op = RingbufReserve result_val in
let instr = make_ir_instruction (IRRingbufOp (obj_val, ringbuf_op)) expr.expr_pos in
emit_instruction ctx instr;
result_val
| "submit" | "discard" | "on_event" ->
(* These operations require arguments, so should be handled as function calls, not field access *)
failwith ("Ring buffer operation '" ^ field ^ "' requires arguments and should be called as a function")
| _ ->
failwith ("Unknown ring buffer operation: " ^ field))
| _ ->
(* For userspace code, allow field access on other types (assuming it will be handled by C compilation) *)
if ctx.is_userspace then
let field_expr = make_ir_expr (IRFieldAccess (obj_val, field)) result_type expr.expr_pos in
emit_variable_decl_val ctx result_val result_type (Some field_expr) expr.expr_pos;
result_val
else
failwith ("Field access on type " ^ (string_of_ir_type obj_val.val_type) ^ " not supported in eBPF context"))
| Ast.ArrowAccess (obj_expr, field) ->
(* Arrow access (pointer->field) - similar to field access but for pointers *)
let obj_val = lower_expression ctx obj_expr in
(* Determine result type using dedicated type resolution *)
let result_type = determine_arrow_access_type ctx obj_val field expr.expr_type in
let result_val = allocate_temp_variable ctx "arrow_access" result_type expr.expr_pos in
(* Handle arrow access for different pointer types *)
(match obj_val.val_type with
| IRPointer (IRStruct (struct_name, _), _) ->
(* Check if this is a context struct pointer *)
(match struct_name_to_context_type struct_name with
| Some ctx_type_str ->
(* Handle context pointer field access *)
let corrected_result_val = result_val in
(match handle_context_field_access_comprehensive ctx_type_str obj_val field corrected_result_val expr.expr_pos with
| Some instr ->
emit_instruction ctx instr;
corrected_result_val
| None ->
failwith ("Unknown context field: " ^ field ^ " for context type: " ^ ctx_type_str))
| None ->
(* Regular struct pointer - use field access *)
let field_expr = make_ir_expr (IRFieldAccess (obj_val, field)) result_type expr.expr_pos in
emit_variable_decl_val ctx result_val result_type (Some field_expr) expr.expr_pos;
result_val)
| _ ->
(* For userspace code, allow arrow access on other types *)
if ctx.is_userspace then
let field_expr = make_ir_expr (IRFieldAccess (obj_val, field)) result_type expr.expr_pos in
emit_variable_decl_val ctx result_val result_type (Some field_expr) expr.expr_pos;
result_val
else
failwith ("Arrow access on type " ^ (string_of_ir_type obj_val.val_type) ^ " not supported in eBPF context"))
| Ast.BinaryOp (left_expr, op, right_expr) ->
let left_val = lower_expression ctx left_expr in
let right_val = lower_expression ctx right_expr in
let ir_op = lower_binary_op op in
let result_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type ast_type
| None ->
(* For pointer arithmetic, determine the correct result type *)
(match left_val.val_type, ir_op, right_val.val_type with
(* Pointer - Pointer = size (pointer subtraction) *)
| IRPointer _, IRSub, IRPointer _ -> IRU64
(* Pointer + Integer = Pointer (pointer offset) *)
| IRPointer (t, bounds), (IRAdd | IRSub), _ -> IRPointer (t, bounds)
(* Integer + Pointer = Pointer (pointer offset) *)
| _, IRAdd, IRPointer (t, bounds) -> IRPointer (t, bounds)
(* Default to left operand type *)
| _ -> left_val.val_type)
in
let result_val = allocate_temp_variable ctx "binop" result_type expr.expr_pos in
let bin_expr = make_ir_expr (IRBinOp (left_val, ir_op, right_val)) result_type expr.expr_pos in
emit_variable_decl_val ctx result_val result_type (Some bin_expr) expr.expr_pos;
result_val
| Ast.UnaryOp (op, operand_expr) ->
let operand_val = lower_expression ctx operand_expr in
let ir_op = lower_unary_op op in
(* Calculate the correct result type based on the operation *)
let result_type = match op with
| AddressOf ->
(* &T -> *T (pointer to the operand type) *)
(* Special handling for map access: the result is a pointer to the map value type *)
(match operand_val.value_desc with
| IRMapAccess (_, _, _) ->
(* Map access: &stats should return a pointer to the map value type *)
IRPointer (operand_val.val_type, make_bounds_info ~nullable:true ())
| _ -> IRPointer (operand_val.val_type, make_bounds_info ~nullable:false ()))
| Deref ->
(* *T -> T (dereference the pointer to get the pointed-to type) *)
(match operand_val.val_type with
| IRPointer (inner_type, _) -> inner_type
| _ -> failwith ("Cannot dereference non-pointer type"))
| _ ->
(* For other unary ops (Not, Neg), result type is same as operand *)
operand_val.val_type
in
let result_val = allocate_temp_variable ctx "unop" result_type expr.expr_pos in
(* Handle all unary operations uniformly to avoid register reference issues *)
let un_expr = make_ir_expr (IRUnOp (ir_op, operand_val)) result_type expr.expr_pos in
emit_variable_decl_val ctx result_val result_type (Some un_expr) expr.expr_pos;
result_val
| Ast.StructLiteral (struct_name, field_assignments) ->
let result_type = match expr.expr_type with
| Some ast_type -> ast_type_to_ir_type_with_context ctx.symbol_table ast_type
| None -> IRStruct (struct_name, [])
in
let result_val = allocate_temp_variable ctx "struct_literal" result_type expr.expr_pos in