Skip to content

Commit dbc4d58

Browse files
Merge pull request #3551 from CIeNET-International:charlesli/synthetic_dataset
PiperOrigin-RevId: 895503401
2 parents 44fc6d0 + 7985492 commit dbc4d58

1 file changed

Lines changed: 14 additions & 0 deletions

File tree

tests/integration/train_tests.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class TrainTests(unittest.TestCase):
7979
f"base_output_directory={_base_output_directory}",
8080
"run_name=runner_test",
8181
f"dataset_path={dataset_path}",
82+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
8283
"steps=2",
8384
"enable_checkpointing=False",
8485
"enable_goodput_recording=False",
@@ -93,6 +94,7 @@ class TrainTests(unittest.TestCase):
9394
f"base_output_directory={_base_output_directory}",
9495
"run_name=runner_test",
9596
f"dataset_path={dataset_path}",
97+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
9698
"steps=2",
9799
"ici_tensor_transpose_parallelism=4",
98100
"enable_goodput_recording=False",
@@ -105,6 +107,7 @@ class TrainTests(unittest.TestCase):
105107
f"base_output_directory={_base_output_directory}",
106108
"run_name=runner_test",
107109
f"dataset_path={dataset_path}",
110+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
108111
"quantization=int8",
109112
"steps=2",
110113
"enable_checkpointing=False",
@@ -118,6 +121,7 @@ class TrainTests(unittest.TestCase):
118121
f"base_output_directory={_base_output_directory}",
119122
"run_name=runner_test",
120123
f"dataset_path={dataset_path}",
124+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
121125
"quantization=fp8",
122126
"steps=2",
123127
"enable_checkpointing=False",
@@ -131,6 +135,7 @@ class TrainTests(unittest.TestCase):
131135
f"base_output_directory={_base_output_directory}",
132136
"run_name=runner_test",
133137
f"dataset_path={dataset_path}",
138+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
134139
"quantization=nanoo_fp8",
135140
"steps=2",
136141
"enable_checkpointing=False",
@@ -144,6 +149,7 @@ class TrainTests(unittest.TestCase):
144149
f"base_output_directory={_base_output_directory}",
145150
"run_name=runner_test",
146151
f"dataset_path={dataset_path}",
152+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
147153
"quantization=te_fp8_delayedscaling",
148154
"steps=2",
149155
"enable_checkpointing=False",
@@ -157,6 +163,7 @@ class TrainTests(unittest.TestCase):
157163
f"base_output_directory={_base_output_directory}",
158164
"run_name=runner_test",
159165
f"dataset_path={dataset_path}",
166+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
160167
"quantization=te_fp8_currentscaling",
161168
"steps=2",
162169
"enable_checkpointing=False",
@@ -170,6 +177,7 @@ class TrainTests(unittest.TestCase):
170177
f"base_output_directory={_base_output_directory}",
171178
"run_name=runner_test",
172179
f"dataset_path={dataset_path}",
180+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
173181
"quantization=te_mxfp8",
174182
"steps=2",
175183
"enable_checkpointing=False",
@@ -183,6 +191,7 @@ class TrainTests(unittest.TestCase):
183191
f"base_output_directory={_base_output_directory}",
184192
"run_name=runner_test",
185193
f"dataset_path={dataset_path}",
194+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
186195
"steps=2",
187196
"enable_checkpointing=False",
188197
"enable_goodput_recording=False",
@@ -334,6 +343,7 @@ def test_gpu_cudnn_flash_te(self):
334343
f"base_output_directory={self._base_output_directory}",
335344
"run_name=runner_test",
336345
f"dataset_path={self.dataset_path}",
346+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
337347
"steps=2",
338348
"enable_checkpointing=False",
339349
"enable_goodput_recording=False",
@@ -354,6 +364,7 @@ def test_gpu_context_parallelism(self):
354364
f"base_output_directory={self._base_output_directory}",
355365
"run_name=runner_test",
356366
f"dataset_path={self.dataset_path}",
367+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
357368
"steps=10",
358369
"enable_checkpointing=False",
359370
"enable_goodput_recording=False",
@@ -466,6 +477,7 @@ def test_gpu_cudnn_flash_jax(self):
466477
f"base_output_directory={self._base_output_directory}",
467478
"run_name=runner_test",
468479
f"dataset_path={self.dataset_path}",
480+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
469481
"steps=2",
470482
"enable_checkpointing=False",
471483
"enable_goodput_recording=False",
@@ -559,6 +571,7 @@ def test_gpu_packed_attention(self):
559571
f"base_output_directory={self._base_output_directory}",
560572
"run_name=runner_test",
561573
f"dataset_path={self.dataset_path}",
574+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
562575
"steps=10",
563576
"enable_checkpointing=False",
564577
"enable_goodput_recording=False",
@@ -583,6 +596,7 @@ def test_gpu_ring_attention(self):
583596
f"base_output_directory={self._base_output_directory}",
584597
"run_name=runner_test",
585598
f"dataset_path={self.dataset_path}",
599+
"dataset_type=synthetic", # use synthetic dataset_type to decrease training time
586600
"steps=10",
587601
"enable_checkpointing=False",
588602
"enable_goodput_recording=False",

0 commit comments

Comments
 (0)