@@ -968,20 +968,27 @@ def _make_resume_builder(stub_resource_provider, stub_test_config_builder, tmp_p
968968 )
969969
970970
971- def test_build_resume_raises_without_metadata (stub_resource_provider , stub_test_config_builder , tmp_path ):
972- """resume=True when only the folder exists (no metadata.json) raises DatasetGenerationError .
971+ def test_build_resume_starts_fresh_without_metadata (stub_resource_provider , stub_test_config_builder , tmp_path , caplog ):
972+ """resume=True when only the folder exists (no metadata.json) logs an info message and starts fresh .
973973
974974 This covers the case where a run was interrupted before any batch completed — the
975975 folder was created by _write_builder_config but metadata.json was never written.
976+ Previously this raised DatasetGenerationError; now it silently restarts from batch 0.
976977 """
977978 # Pre-create the folder with content so resolved_dataset_name(resume=True) returns "dataset"
978979 dataset_dir = tmp_path / "dataset"
979980 dataset_dir .mkdir ()
980981 (dataset_dir / "builder_config.json" ).write_text ("{}" ) # non-empty, no metadata
981982
982983 builder = _make_resume_builder (stub_resource_provider , stub_test_config_builder , tmp_path )
983- with pytest .raises (DatasetGenerationError , match = "metadata.json not found" ):
984- builder .build (num_records = 4 , resume = True )
984+ with caplog .at_level (logging .INFO ):
985+ with patch .object (builder , "_run_model_health_check_if_needed" ):
986+ with patch .object (builder , "_run_batch" ):
987+ with patch .object (builder .batch_manager , "finish" ):
988+ # resume=False is set internally; build dispatches to the normal (non-resume) path
989+ builder .build (num_records = 4 , resume = True )
990+
991+ assert any ("interrupted before any batch completed" in record .message for record in caplog .records )
985992
986993
987994def test_build_resume_raises_on_num_records_mismatch (stub_resource_provider , stub_test_config_builder , tmp_path ):
@@ -1132,18 +1139,31 @@ def test_build_async_resume_logs_warning_when_already_complete(
11321139 assert any ("already complete" in record .message for record in caplog .records )
11331140
11341141
1135- def test_build_async_resume_raises_without_metadata (stub_resource_provider , stub_test_config_builder , tmp_path ):
1136- """Async resume raises DatasetGenerationError when metadata.json is missing."""
1142+ def test_build_async_resume_starts_fresh_without_metadata (
1143+ stub_resource_provider , stub_test_config_builder , tmp_path , caplog
1144+ ):
1145+ """Async resume with no metadata.json logs an info message and starts fresh.
1146+
1147+ Previously this raised DatasetGenerationError; now it silently restarts from row group 0.
1148+ The log is emitted in build() before dispatching to _build_async, so mocking _build_async
1149+ does not suppress the message.
1150+ """
11371151 dataset_dir = tmp_path / "dataset"
11381152 dataset_dir .mkdir ()
11391153 (dataset_dir / "builder_config.json" ).write_text ("{}" )
11401154
11411155 builder = _make_resume_builder (stub_resource_provider , stub_test_config_builder , tmp_path )
11421156
1143- with patch .object (builder_mod , "DATA_DESIGNER_ASYNC_ENGINE" , True ):
1144- with patch .object (builder , "_run_model_health_check_if_needed" ):
1145- with pytest .raises (DatasetGenerationError , match = "metadata.json not found" ):
1146- builder .build (num_records = 4 , resume = True )
1157+ with caplog .at_level (logging .INFO ):
1158+ with patch .object (builder_mod , "DATA_DESIGNER_ASYNC_ENGINE" , True ):
1159+ with patch .object (builder , "_run_model_health_check_if_needed" ):
1160+ with patch .object (builder , "_build_async" , return_value = True ) as mock_async :
1161+ builder .build (num_records = 4 , resume = True )
1162+
1163+ # _build_async is called with resume=False because the no-metadata path resets the flag
1164+ _ , kwargs = mock_async .call_args
1165+ assert kwargs .get ("resume" ) is False
1166+ assert any ("interrupted before any batch completed" in record .message for record in caplog .records )
11471167
11481168
11491169def test_build_async_resume_already_complete_does_not_run_after_generation_processors (
0 commit comments