@@ -2362,16 +2362,16 @@ def GEMMA4_MAXTEXT_TO_HF_PARAM_MAPPING(config, maxtext_config, scan_layers=False
23622362 f"{ text_base } .layers.{ i } .router.proj.weight" if num_experts > 1 else None for i in hf_indices
23632363 ],
23642364 f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_0" : [
2365- f"{ text_base } .layers.{ i } .moe .gate_up_proj" if num_experts > 1 else None for i in hf_indices
2365+ f"{ text_base } .layers.{ i } .experts .gate_up_proj" if num_experts > 1 else None for i in hf_indices
23662366 ],
23672367 f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_1" : [
2368- f"{ text_base } .layers.{ i } .moe .gate_up_proj" if num_experts > 1 else None for i in hf_indices
2368+ f"{ text_base } .layers.{ i } .experts .gate_up_proj" if num_experts > 1 else None for i in hf_indices
23692369 ],
23702370 f"{ prefix } -mlp-moe_block-MoeBlock_0-wo" : [
2371- f"{ text_base } .layers.{ i } .moe .down_proj" if num_experts > 1 else None for i in hf_indices
2371+ f"{ text_base } .layers.{ i } .experts .down_proj" if num_experts > 1 else None for i in hf_indices
23722372 ],
23732373 f"{ prefix } -mlp-moe_block-MoeBlock_0-per_expert_scale" : [
2374- f"{ text_base } .layers.{ i } .moe .per_expert_scale" if num_experts > 1 else None for i in hf_indices
2374+ f"{ text_base } .layers.{ i } .router .per_expert_scale" if num_experts > 1 else None for i in hf_indices
23752375 ],
23762376 f"{ prefix } -mlp-moe_block-shared_experts-wi_0-kernel" : [
23772377 f"{ text_base } .layers.{ i } .mlp.gate_proj.weight" if num_experts > 1 else None for i in hf_indices
@@ -2440,10 +2440,14 @@ def GEMMA4_MAXTEXT_TO_HF_PARAM_MAPPING(config, maxtext_config, scan_layers=False
24402440 f"{ prefix } -mlp-moe_block-MoeBlock_0-gate-kernel" : f"{ hf_prefix } .router.proj.weight"
24412441 if num_experts > 1
24422442 else None ,
2443- f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_0" : f"{ hf_prefix } .moe.gate_up_proj" if num_experts > 1 else None ,
2444- f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_1" : f"{ hf_prefix } .moe.gate_up_proj" if num_experts > 1 else None ,
2445- f"{ prefix } -mlp-moe_block-MoeBlock_0-wo" : f"{ hf_prefix } .moe.down_proj" if num_experts > 1 else None ,
2446- f"{ prefix } -mlp-moe_block-MoeBlock_0-per_expert_scale" : f"{ hf_prefix } .moe.per_expert_scale"
2443+ f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_0" : f"{ hf_prefix } .experts.gate_up_proj"
2444+ if num_experts > 1
2445+ else None ,
2446+ f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_1" : f"{ hf_prefix } .experts.gate_up_proj"
2447+ if num_experts > 1
2448+ else None ,
2449+ f"{ prefix } -mlp-moe_block-MoeBlock_0-wo" : f"{ hf_prefix } .experts.down_proj" if num_experts > 1 else None ,
2450+ f"{ prefix } -mlp-moe_block-MoeBlock_0-per_expert_scale" : f"{ hf_prefix } .router.per_expert_scale"
24472451 if num_experts > 1
24482452 else None ,
24492453 f"{ prefix } -mlp-moe_block-shared_experts-wi_0-kernel" : f"{ hf_prefix } .mlp.gate_proj.weight"
@@ -2502,10 +2506,10 @@ def GEMMA4_MAXTEXT_TO_HF_PARAM_MAPPING(config, maxtext_config, scan_layers=False
25022506 f"{ prefix } -mlp-moe_block-MoeBlock_0-gate-kernel" : f"{ hf_prefix } .router.proj.weight"
25032507 if num_experts > 1
25042508 else None ,
2505- f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_0" : f"{ hf_prefix } .moe .gate_up_proj" if num_experts > 1 else None ,
2506- f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_1" : f"{ hf_prefix } .moe .gate_up_proj" if num_experts > 1 else None ,
2507- f"{ prefix } -mlp-moe_block-MoeBlock_0-wo" : f"{ hf_prefix } .moe .down_proj" if num_experts > 1 else None ,
2508- f"{ prefix } -mlp-moe_block-MoeBlock_0-per_expert_scale" : f"{ hf_prefix } .moe .per_expert_scale"
2509+ f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_0" : f"{ hf_prefix } .experts .gate_up_proj" if num_experts > 1 else None ,
2510+ f"{ prefix } -mlp-moe_block-MoeBlock_0-wi_1" : f"{ hf_prefix } .experts .gate_up_proj" if num_experts > 1 else None ,
2511+ f"{ prefix } -mlp-moe_block-MoeBlock_0-wo" : f"{ hf_prefix } .experts .down_proj" if num_experts > 1 else None ,
2512+ f"{ prefix } -mlp-moe_block-MoeBlock_0-per_expert_scale" : f"{ hf_prefix } .router .per_expert_scale"
25092513 if num_experts > 1
25102514 else None ,
25112515 f"{ prefix } -mlp-moe_block-shared_experts-wi_0-kernel" : f"{ hf_prefix } .mlp.gate_proj.weight"
0 commit comments