|
1 | | -# Copyright 2023–2025 Google LLC |
| 1 | +# Copyright 2023–2026 Google LLC |
2 | 2 | # |
3 | | -# Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | -# you may not use this file except in compliance with the License. |
5 | | -# You may obtain a copy of the License at |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
6 | 6 | # |
7 | | -# https://www.apache.org/licenses/LICENSE-2.0 |
| 7 | +# https://www.apache.org/licenses/LICENSE-2.0 |
8 | 8 | # |
9 | | -# Unless required by applicable law or agreed to in writing, software |
10 | | -# distributed under the License is distributed on an "AS IS" BASIS, |
11 | | -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | -# See the License for the specific language governing permissions and |
13 | | -# limitations under the License. |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
14 | 14 |
|
15 | 15 | """ |
16 | 16 | This config defines the architectural configurations of the Hugging Face version of a model. |
|
215 | 215 | query_pre_attn_scalar=144, |
216 | 216 | ) |
217 | 217 |
|
| 218 | +qwen25_1_5b_config = transformers.Qwen2Config( |
| 219 | + vocab_size=151936, |
| 220 | + hidden_size=1536, |
| 221 | + intermediate_size=8960, |
| 222 | + num_hidden_layers=28, |
| 223 | + num_attention_heads=12, |
| 224 | + num_key_value_heads=2, |
| 225 | + hidden_act="silu", |
| 226 | + max_position_embeddings=32768, |
| 227 | + rms_norm_eps=1e-06, |
| 228 | + rope_theta=1000000.0, |
| 229 | + tie_word_embeddings=True, |
| 230 | + torch_dtype="bfloat16", |
| 231 | + attention_bias=True, |
| 232 | +) |
| 233 | + |
218 | 234 | qwen25_7b_config = transformers.Qwen2Config( |
219 | 235 | vocab_size=152064, |
220 | 236 | hidden_size=3584, |
|
990 | 1006 | "gemma3-4b": gemma3_4b_config, |
991 | 1007 | "gemma3-12b": gemma3_12b_config, |
992 | 1008 | "gemma3-27b": gemma3_27b_config, |
| 1009 | + "qwen2.5-1.5b": qwen25_1_5b_config, |
993 | 1010 | "qwen2.5-7b": qwen25_7b_config, |
994 | 1011 | "qwen2.5-14b": qwen25_14b_config, |
995 | 1012 | "qwen3-0.6b": qwen3_0_6b_config, |
|
0 commit comments