Skip to content

Commit c30ada0

Browse files
Merge pull request #3514 from AI-Hypercomputer:agagik-gemma2
PiperOrigin-RevId: 890730992
2 parents cd7a1eb + a910675 commit c30ada0

1 file changed

Lines changed: 11 additions & 0 deletions

File tree

src/maxtext/checkpoint_conversion/to_huggingface.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,17 @@ def _validate_or_update_architecture(hf_config, max_config, override: bool):
179179
if hf_value is None or mt_value is None:
180180
continue
181181

182+
# Special handling for Gemma 2 where local and global layers are bundled
183+
if max_config.model_name.startswith("gemma2") and hf_attr == "num_hidden_layers":
184+
if isinstance(mt_value, int):
185+
mt_value = mt_value * 2
186+
187+
# Handle vocab size padding
188+
if hf_attr == "vocab_size" and isinstance(mt_value, int) and isinstance(hf_value, int):
189+
# MaxText often pads vocab size to a multiple of 128 or 256 for TPU efficiency
190+
if mt_value >= hf_value and (mt_value - hf_value) < 256:
191+
mt_value = hf_value
192+
182193
# Compare values (with tolerance for floats)
183194
is_match = False
184195
if isinstance(hf_value, float) or isinstance(mt_value, float):

0 commit comments

Comments
 (0)