@@ -140,11 +140,22 @@ def _validate_or_update_architecture(hf_config, max_config, override: bool):
140140 # Mapping from Hugging Face config attribute -> MaxText config attribute
141141 # Note: We use derived MaxText attributes (e.g. emb_dim) which account for scale factors.
142142 attributes_to_check = [
143- ("num_attention_heads" , "num_query_heads" ),
144- ("num_key_value_heads" , "num_kv_heads" ),
145143 ("hidden_size" , "emb_dim" ),
146144 ("intermediate_size" , "mlp_dim" ),
145+ ("kv_lora_rank" , "kv_lora_rank" ),
146+ ("moe_intermediate_size" , "moe_mlp_dim" ),
147+ ("n_routed_experts" , "num_experts" ),
148+ ("n_shared_experts" , "shared_experts" ),
149+ ("num_attention_heads" , "num_query_heads" ),
150+ ("num_experts" , "num_experts" ),
151+ ("num_experts_per_tok" , "num_experts_per_tok" ),
147152 ("num_hidden_layers" , "num_decoder_layers" ),
153+ ("num_key_value_heads" , "num_kv_heads" ),
154+ ("num_local_experts" , "num_experts" ),
155+ ("q_lora_rank" , "q_lora_rank" ),
156+ ("qk_nope_head_dim" , "qk_nope_head_dim" ),
157+ ("qk_rope_head_dim" , "qk_rope_head_dim" ),
158+ ("v_head_dim" , "v_head_dim" ),
148159 ("vocab_size" , "vocab_size" ),
149160 ]
150161
0 commit comments