From b7d7668c99f2f9b1438bb8b2d5fd10c34e4aa028 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Thu, 22 May 2025 17:52:49 +0300 Subject: [PATCH 1/9] Feat: Add plan option to always compare against prod --- sqlmesh/core/config/plan.py | 2 + sqlmesh/core/context.py | 6 +- sqlmesh/core/context_diff.py | 45 +++++++++++++- sqlmesh/core/plan/builder.py | 2 +- tests/core/test_integration.py | 107 ++++++++++++++++++++++++++++++++- 5 files changed, 155 insertions(+), 7 deletions(-) diff --git a/sqlmesh/core/config/plan.py b/sqlmesh/core/config/plan.py index cac0b3fd70..456b5339db 100644 --- a/sqlmesh/core/config/plan.py +++ b/sqlmesh/core/config/plan.py @@ -20,6 +20,7 @@ class PlanConfig(BaseConfig): auto_apply: Whether to automatically apply the new plan after creation. use_finalized_state: Whether to compare against the latest finalized environment state, or to use whatever state the target environment is currently in. + always_compare_against_prod: Whether to always compare against production when planning, even if the target environment exists. """ forward_only: bool = False @@ -30,3 +31,4 @@ class PlanConfig(BaseConfig): no_prompts: bool = True auto_apply: bool = False use_finalized_state: bool = False + always_compare_against_prod: bool = False diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index de5ee6ede9..902577b01f 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -1480,7 +1480,7 @@ def plan_builder( snapshots = self._snapshots(models_override) context_diff = self._context_diff( - environment or c.PROD, + environment=environment, snapshots=snapshots, create_from=create_from, force_no_diff=restate_models is not None @@ -2630,11 +2630,12 @@ def _context_diff( diff_rendered: bool = False, ) -> ContextDiff: environment = Environment.sanitize_name(environment) + if force_no_diff: return ContextDiff.create_no_diff(environment, self.state_reader) return ContextDiff.create( - environment, + environment=environment, snapshots=snapshots or self.snapshots, create_from=create_from or c.PROD, state_reader=self.state_reader, @@ -2645,6 +2646,7 @@ def _context_diff( environment_statements=self._environment_statements, gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, infer_python_dependencies=self.config.infer_python_dependencies, + always_compare_against_prod=self.config.plan.always_compare_against_prod, ) def _destroy(self) -> None: diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 4212f328b1..4c48c4d29e 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -14,6 +14,8 @@ import sys import typing as t +import logging + from difflib import ndiff, unified_diff from functools import cached_property from sqlmesh.core import constants as c @@ -38,6 +40,8 @@ IGNORED_PACKAGES = {"sqlmesh", "sqlglot"} +logger = logging.getLogger(__name__) + class ContextDiff(PydanticModel): """ContextDiff is an object representing the difference between two environments. @@ -88,6 +92,8 @@ class ContextDiff(PydanticModel): """Environment statements.""" diff_rendered: bool = False """Whether the diff should compare raw vs rendered models""" + initial_environment: str = "" + """The initial target environment (e.g 'dev'), if the plan option `always_compare_to_prod` is set""" @classmethod def create( @@ -103,6 +109,8 @@ def create( environment_statements: t.Optional[t.List[EnvironmentStatements]] = [], gateway_managed_virtual_layer: bool = False, infer_python_dependencies: bool = True, + initial_environment: t.Optional[str] = None, + always_compare_against_prod: bool = False, ) -> ContextDiff: """Create a ContextDiff object. @@ -127,8 +135,17 @@ def create( Returns: The ContextDiff object. """ - environment = environment.lower() + initial_environment = environment + environment = _get_target_environment( + environment, state_reader, always_compare_against_prod + ) + env = state_reader.get_environment(environment) + initial_env = ( + env + if initial_environment == environment + else state_reader.get_environment(initial_environment) + ) create_from_env_exists = False if env is None or env.expired: @@ -222,6 +239,7 @@ def create( return ContextDiff( environment=environment, + initial_environment=initial_environment, is_new_environment=is_new_environment, is_unfinalized_environment=bool(env and not env.finalized_ts), normalize_environment_name=is_new_environment or bool(env and env.normalize_name), @@ -232,7 +250,9 @@ def create( modified_snapshots=modified_snapshots, snapshots=merged_snapshots, new_snapshots=new_snapshots, - previous_plan_id=env.plan_id if env and not is_new_environment else None, + previous_plan_id=initial_env.plan_id + if initial_env and not is_new_environment + else None, previously_promoted_snapshot_ids=previously_promoted_snapshot_ids, previous_finalized_snapshots=env.previous_finalized_snapshots if env else None, previous_requirements=env.requirements if env else {}, @@ -261,8 +281,9 @@ def create_no_diff(cls, environment: str, state_reader: StateReader) -> ContextD snapshots = state_reader.get_snapshots(env.snapshots) + environment = env.name return ContextDiff( - environment=env.name, + environment=environment, is_new_environment=False, is_unfinalized_environment=False, normalize_environment_name=env.normalize_name, @@ -281,6 +302,7 @@ def create_no_diff(cls, environment: str, state_reader: StateReader) -> ContextD previous_environment_statements=[], previous_gateway_managed_virtual_layer=env.gateway_managed, gateway_managed_virtual_layer=env.gateway_managed, + initial_environment=environment, ) @property @@ -479,6 +501,23 @@ def text_diff(self, name: str) -> str: return "" +def _get_target_environment( + environment: str, state_reader: StateReader, always_compare_against_prod: bool = False +) -> str: + if always_compare_against_prod: + prod = state_reader.get_environment(c.PROD) + if prod: + logger.warning( + f"Comparing against production environment instead of {environment}. Note that this may lead to " + "additional backfills as accumulated changes are still pushed to the target environment." + ) + environment = c.PROD + else: + environment = environment or c.PROD + + return environment.lower() + + def _build_requirements( provided_requirements: t.Dict[str, str], excluded_requirements: t.Set[str], diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py index 27b81f5d74..76315de8c2 100644 --- a/sqlmesh/core/plan/builder.py +++ b/sqlmesh/core/plan/builder.py @@ -159,7 +159,7 @@ def __init__( self.override_end = end is not None self.environment_naming_info = EnvironmentNamingInfo.from_environment_catalog_mapping( environment_catalog_mapping or {}, - name=self._context_diff.environment, + name=self._context_diff.initial_environment, suffix_target=environment_suffix_target, normalize_name=self._context_diff.normalize_environment_name, gateway_managed=self._context_diff.gateway_managed_virtual_layer, diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index bfc416596b..740e3445f2 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -6,7 +6,7 @@ from datetime import timedelta from unittest import mock from unittest.mock import patch - +import logging import os import numpy as np # noqa: TID253 import pandas as pd # noqa: TID253 @@ -37,6 +37,7 @@ from sqlmesh.core.console import Console, get_console from sqlmesh.core.context import Context from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.config.plan import PlanConfig from sqlmesh.core.engine_adapter import EngineAdapter from sqlmesh.core.environment import EnvironmentNamingInfo from sqlmesh.core.macros import macro @@ -6252,3 +6253,107 @@ def test_render_path_instead_of_model(tmp_path: Path): # Case 3: Render the model successfully assert ctx.render("test_model").sql() == 'SELECT 1 AS "col"' + + +@use_terminal_console +def test_plan_always_compare_against_prod(mocker: MockerFixture, tmp_path: Path): + def plan_with_output(ctx: Context, environment: str): + with patch.object(logger, "info") as mock_logger: + with capture_output() as output: + ctx.load() + ctx.plan(environment, no_prompts=True, auto_apply=True) + + # Facade logs info "Promoting environment {environment}" + assert mock_logger.call_args[0][1] == environment + + return output + + models_dir = tmp_path / "models" + + logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") + + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" + ) + + config = Config(plan=PlanConfig(always_compare_against_prod=True)) + ctx = Context(paths=[tmp_path], config=config) + + # Case 1: Neither prod nor dev exists, so dev is initialized + output = plan_with_output(ctx, "dev") + + assert """`dev` environment will be initialized""" in output.stdout + + # Case 2: Prod does not exist, so dev is updated + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 5 AS col" + ) + + plan = ctx.plan_builder("dev").build() + + assert plan.context_diff.initial_environment == "dev" + assert plan.context_diff.environment == "dev" + + output = plan_with_output(ctx, "dev") + + assert "Differences from the `dev` environment" in output.stdout + + # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod + output = plan_with_output(ctx, "prod") + + assert "`prod` environment will be initialized" in output.stdout + + # Case 4: Dev is updated with a breaking change, so plan comparisons moving forward should be against prod + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert plan.context_diff.initial_environment == "dev" + assert plan.context_diff.environment == "prod" + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + + assert "Differences from the `prod` environment" in output.stdout + + # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes + # so it's still classified as a breaking change + create_temp_file( + tmp_path, + models_dir / "a.sql", + "MODEL (name test.a, kind FULL, owner 'test'); SELECT 10 AS col", + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert plan.context_diff.initial_environment == "dev" + assert plan.context_diff.environment == "prod" + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + + assert "Differences from the `prod` environment" in output.stdout + + assert ( + """MODEL ( + name test.a, ++ owner test, + kind FULL + ) + SELECT +- 5 AS col ++ 10 AS col""" + in output.stdout + ) From 15a0b81281b0be4352ca3456550e33f0c7eea72e Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Tue, 3 Jun 2025 16:49:48 +0300 Subject: [PATCH 2/9] PR Feedback 1 --- sqlmesh/core/context.py | 2 +- sqlmesh/core/context_diff.py | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 902577b01f..75d9d6bf46 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -2635,7 +2635,7 @@ def _context_diff( return ContextDiff.create_no_diff(environment, self.state_reader) return ContextDiff.create( - environment=environment, + environment=environment or c.PROD, snapshots=snapshots or self.snapshots, create_from=create_from or c.PROD, state_reader=self.state_reader, diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 4c48c4d29e..ac80752817 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -14,7 +14,6 @@ import sys import typing as t -import logging from difflib import ndiff, unified_diff from functools import cached_property @@ -40,8 +39,6 @@ IGNORED_PACKAGES = {"sqlmesh", "sqlglot"} -logger = logging.getLogger(__name__) - class ContextDiff(PydanticModel): """ContextDiff is an object representing the difference between two environments. @@ -135,7 +132,8 @@ def create( Returns: The ContextDiff object. """ - initial_environment = environment + initial_environment = environment.lower() + environment = _get_target_environment( environment, state_reader, always_compare_against_prod ) @@ -507,13 +505,7 @@ def _get_target_environment( if always_compare_against_prod: prod = state_reader.get_environment(c.PROD) if prod: - logger.warning( - f"Comparing against production environment instead of {environment}. Note that this may lead to " - "additional backfills as accumulated changes are still pushed to the target environment." - ) environment = c.PROD - else: - environment = environment or c.PROD return environment.lower() From 3b75e3c0829ff6f8ac712e6f0718f60bfc64c5a1 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Wed, 4 Jun 2025 10:50:08 +0300 Subject: [PATCH 3/9] PR Feedback 2, various fixes and improvements --- sqlmesh/core/console.py | 18 ++++++++++---- sqlmesh/core/context.py | 11 +++++---- sqlmesh/core/context_diff.py | 43 +++++++++++++++------------------- sqlmesh/core/plan/builder.py | 5 ++-- tests/core/test_integration.py | 30 ++++++++++++++---------- 5 files changed, 60 insertions(+), 47 deletions(-) diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py index e272442e67..aa03d3cc70 100644 --- a/sqlmesh/core/console.py +++ b/sqlmesh/core/console.py @@ -219,6 +219,7 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, + environment: t.Optional[str] = None, ) -> None: """Displays a summary of differences for the environment.""" @@ -645,6 +646,7 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, + environment: t.Optional[str] = None, ) -> None: pass @@ -1524,18 +1526,21 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, + environment: t.Optional[str] = None, ) -> None: """Shows a summary of the environment differences. Args: context_diff: The context diff to use to print the summary no_diff: Hide the actual environment statement differences. + environment: The initial target environment """ if context_diff.is_new_environment: + new_environment = environment or context_diff.environment msg = ( - f"\n`{context_diff.environment}` environment will be initialized" + f"\n`{new_environment}` environment will be initialized" if not context_diff.create_from_env_exists - else f"\nNew environment `{context_diff.environment}` will be created from `{context_diff.create_from}`" + else f"\nNew environment `{new_environment}` will be created from `{context_diff.create_from}`" ) self._print(Tree(f"[bold]{msg}\n")) if not context_diff.has_snapshot_changes: @@ -1786,6 +1791,7 @@ def _prompt_categorize( self.show_environment_difference_summary( plan.context_diff, no_diff=no_diff, + environment=plan_builder.environment_naming_info.name, ) if plan.context_diff.has_changes: @@ -2898,18 +2904,21 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, + environment: t.Optional[str] = None, ) -> None: """Shows a summary of the environment differences. Args: context_diff: The context diff to use to print the summary. no_diff: Hide the actual environment statements differences. + environment: The initial target environment """ if context_diff.is_new_environment: + new_environment = environment or context_diff.environment msg = ( - f"\n**`{context_diff.environment}` environment will be initialized**" + f"\n**`{new_environment}` environment will be initialized**" if not context_diff.create_from_env_exists - else f"\n**New environment `{context_diff.environment}` will be created from `{context_diff.create_from}`**" + else f"\n**New environment `{new_environment}` will be created from `{context_diff.create_from}`**" ) self._print(msg) if not context_diff.has_snapshot_changes: @@ -3501,6 +3510,7 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, + environment: t.Optional[str] = None, ) -> None: self._write("Environment Difference Summary:") diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 75d9d6bf46..6920eb02c1 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -1480,13 +1480,14 @@ def plan_builder( snapshots = self._snapshots(models_override) context_diff = self._context_diff( - environment=environment, + environment or c.PROD, snapshots=snapshots, create_from=create_from, force_no_diff=restate_models is not None or (backfill_models is not None and not backfill_models), ensure_finalized_snapshots=self.config.plan.use_finalized_state, diff_rendered=diff_rendered, + always_compare_against_prod=self.config.plan.always_compare_against_prod, ) modified_model_names = { *context_diff.modified_snapshots, @@ -1520,6 +1521,7 @@ def plan_builder( return self.PLAN_BUILDER_TYPE( context_diff=context_diff, + environment=environment or c.PROD, start=start, end=end, execution_time=execution_time, @@ -1642,6 +1644,7 @@ def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> b self.console.show_environment_difference_summary( context_diff, no_diff=not detailed, + environment=environment, ) if context_diff.has_changes: self.console.show_model_difference_summary( @@ -2628,14 +2631,14 @@ def _context_diff( force_no_diff: bool = False, ensure_finalized_snapshots: bool = False, diff_rendered: bool = False, + always_compare_against_prod: bool = False, ) -> ContextDiff: environment = Environment.sanitize_name(environment) - if force_no_diff: return ContextDiff.create_no_diff(environment, self.state_reader) return ContextDiff.create( - environment=environment or c.PROD, + environment, snapshots=snapshots or self.snapshots, create_from=create_from or c.PROD, state_reader=self.state_reader, @@ -2646,7 +2649,7 @@ def _context_diff( environment_statements=self._environment_statements, gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, infer_python_dependencies=self.config.infer_python_dependencies, - always_compare_against_prod=self.config.plan.always_compare_against_prod, + always_compare_against_prod=always_compare_against_prod, ) def _destroy(self) -> None: diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index ac80752817..90049d6d09 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -89,8 +89,6 @@ class ContextDiff(PydanticModel): """Environment statements.""" diff_rendered: bool = False """Whether the diff should compare raw vs rendered models""" - initial_environment: str = "" - """The initial target environment (e.g 'dev'), if the plan option `always_compare_to_prod` is set""" @classmethod def create( @@ -106,7 +104,6 @@ def create( environment_statements: t.Optional[t.List[EnvironmentStatements]] = [], gateway_managed_virtual_layer: bool = False, infer_python_dependencies: bool = True, - initial_environment: t.Optional[str] = None, always_compare_against_prod: bool = False, ) -> ContextDiff: """Create a ContextDiff object. @@ -133,33 +130,34 @@ def create( The ContextDiff object. """ initial_environment = environment.lower() - - environment = _get_target_environment( - environment, state_reader, always_compare_against_prod - ) - - env = state_reader.get_environment(environment) - initial_env = ( - env - if initial_environment == environment - else state_reader.get_environment(initial_environment) - ) + initial_env = state_reader.get_environment(initial_environment) create_from_env_exists = False - if env is None or env.expired: - env = state_reader.get_environment(create_from.lower()) + if initial_env is None or initial_env.expired: + initial_env = state_reader.get_environment(create_from.lower()) - if not env and create_from != c.PROD: + if not initial_env and create_from != c.PROD: get_console().log_warning( f"The environment name '{create_from}' was passed to the `plan` command's `--create-from` argument, but '{create_from}' does not exist. Initializing new environment '{environment}' from scratch." ) is_new_environment = True - create_from_env_exists = env is not None + create_from_env_exists = initial_env is not None previously_promoted_snapshot_ids = set() else: is_new_environment = False - previously_promoted_snapshot_ids = {s.snapshot_id for s in env.promoted_snapshots} + previously_promoted_snapshot_ids = { + s.snapshot_id for s in initial_env.promoted_snapshots + } + + # Find the proper environment to diff against, this might be different than the "initial" (i.e user provided) environment + # e.g it will default to prod if the plan option `always_compare_against_prod` is set. + environment = _get_diff_environment(environment, state_reader, always_compare_against_prod) + env = ( + initial_env + if (initial_environment == environment) + else state_reader.get_environment(environment) + ) environment_snapshot_infos = [] if env: @@ -237,7 +235,6 @@ def create( return ContextDiff( environment=environment, - initial_environment=initial_environment, is_new_environment=is_new_environment, is_unfinalized_environment=bool(env and not env.finalized_ts), normalize_environment_name=is_new_environment or bool(env and env.normalize_name), @@ -279,9 +276,8 @@ def create_no_diff(cls, environment: str, state_reader: StateReader) -> ContextD snapshots = state_reader.get_snapshots(env.snapshots) - environment = env.name return ContextDiff( - environment=environment, + environment=env.name, is_new_environment=False, is_unfinalized_environment=False, normalize_environment_name=env.normalize_name, @@ -300,7 +296,6 @@ def create_no_diff(cls, environment: str, state_reader: StateReader) -> ContextD previous_environment_statements=[], previous_gateway_managed_virtual_layer=env.gateway_managed, gateway_managed_virtual_layer=env.gateway_managed, - initial_environment=environment, ) @property @@ -499,7 +494,7 @@ def text_diff(self, name: str) -> str: return "" -def _get_target_environment( +def _get_diff_environment( environment: str, state_reader: StateReader, always_compare_against_prod: bool = False ) -> str: if always_compare_against_prod: diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py index 76315de8c2..c8acb8f22d 100644 --- a/sqlmesh/core/plan/builder.py +++ b/sqlmesh/core/plan/builder.py @@ -6,7 +6,7 @@ from collections import defaultdict from functools import cached_property - +from sqlmesh.core import constants as c from sqlmesh.core.console import PlanBuilderConsole, get_console from sqlmesh.core.config import ( AutoCategorizationMode, @@ -117,6 +117,7 @@ def __init__( interval_end_per_model: t.Optional[t.Dict[str, int]] = None, console: t.Optional[PlanBuilderConsole] = None, user_provided_flags: t.Optional[t.Dict[str, UserProvidedFlags]] = None, + environment: str = c.PROD, ): self._context_diff = context_diff self._no_gaps = no_gaps @@ -159,7 +160,7 @@ def __init__( self.override_end = end is not None self.environment_naming_info = EnvironmentNamingInfo.from_environment_catalog_mapping( environment_catalog_mapping or {}, - name=self._context_diff.initial_environment, + name=environment, suffix_target=environment_suffix_target, normalize_name=self._context_diff.normalize_environment_name, gateway_managed=self._context_diff.gateway_managed_virtual_layer, diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index 740e3445f2..fdf79ea36d 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -6268,6 +6268,11 @@ def plan_with_output(ctx: Context, environment: str): return output + def assert_environments(ctx: Context, input_env: str, promote_env: str, diff_env: str): + plan_builder = ctx.plan_builder(input_env) + assert plan_builder.environment_naming_info.name == promote_env + assert plan_builder.build().context_diff.environment == diff_env + models_dir = tmp_path / "models" logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") @@ -6283,27 +6288,25 @@ def plan_with_output(ctx: Context, environment: str): output = plan_with_output(ctx, "dev") assert """`dev` environment will be initialized""" in output.stdout + assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="dev") # Case 2: Prod does not exist, so dev is updated create_temp_file( tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 5 AS col" ) - plan = ctx.plan_builder("dev").build() - - assert plan.context_diff.initial_environment == "dev" - assert plan.context_diff.environment == "dev" - output = plan_with_output(ctx, "dev") + assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="dev") assert "Differences from the `dev` environment" in output.stdout # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod output = plan_with_output(ctx, "prod") - assert "`prod` environment will be initialized" in output.stdout - # Case 4: Dev is updated with a breaking change, so plan comparisons moving forward should be against prod + assert_environments(ctx, input_env="prod", promote_env="prod", diff_env="prod") + + # Case 4: Dev is updated with a breaking change. Prod exists now so plan comparisons moving forward should be against prod create_temp_file( tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" ) @@ -6311,8 +6314,7 @@ def plan_with_output(ctx: Context, environment: str): plan = ctx.plan_builder("dev").build() - assert plan.context_diff.initial_environment == "dev" - assert plan.context_diff.environment == "prod" + assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="prod") assert ( next(iter(plan.context_diff.snapshots.values())).change_category @@ -6320,7 +6322,6 @@ def plan_with_output(ctx: Context, environment: str): ) output = plan_with_output(ctx, "dev") - assert "Differences from the `prod` environment" in output.stdout # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes @@ -6334,8 +6335,7 @@ def plan_with_output(ctx: Context, environment: str): plan = ctx.plan_builder("dev").build() - assert plan.context_diff.initial_environment == "dev" - assert plan.context_diff.environment == "prod" + assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="prod") assert ( next(iter(plan.context_diff.snapshots.values())).change_category @@ -6343,7 +6343,6 @@ def plan_with_output(ctx: Context, environment: str): ) output = plan_with_output(ctx, "dev") - assert "Differences from the `prod` environment" in output.stdout assert ( @@ -6357,3 +6356,8 @@ def plan_with_output(ctx: Context, environment: str): + 10 AS col""" in output.stdout ) + + # Case 6: Check that we can still run Context::diff() against any environment + for environment in ["dev", "prod"]: + context_diff = ctx._context_diff(environment) + assert context_diff.environment == environment From 713744a5a07e9462e8dce2e52da530dbaa422814 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Wed, 4 Jun 2025 14:45:13 +0300 Subject: [PATCH 4/9] Add documentation --- docs/guides/configuration.md | 109 +++++++++++++++++++++++++++++++++++ sqlmesh/core/context_diff.py | 2 +- 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 561996594c..34779e1216 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -381,6 +381,115 @@ Example showing default values: ) ``` + +### Always comparing against production + +By default, SQLMesh compares the current state of project files to the target `` environment when `sqlmesh plan ` is run. However, a common expectation is that local changes should always be compared to the production environment. + +The `always_compare_against_prod` boolean plan option can alter this behavior. When enabled, SQLMesh will always attempt to compare against the production environment; If that does not exist, SQLMesh will fall back to comparing against the target environment. + +**NOTE:**: Upon succesfull plan application, changes are still promoted to the target `` environment. + +=== "YAML" + + ```yaml linenums="1" + plan: + always_compare_against_prod: True + ``` + +=== "Python" + + ```python linenums="1" + from sqlmesh.core.config import ( + Config, + ModelDefaultsConfig, + PlanConfig, + ) + + config = Config( + model_defaults=ModelDefaultsConfig(dialect=), + plan=PlanConfig( + always_compare_against_prod=True, + ), + ) + ``` + +#### Change Categorization Example + +Consider this scenario with `always_compare_against_prod` enabled: + +1. Initial state in `prod`: +```sql +MODEL (name sqlmesh_example.test_model, kind FULL); +SELECT 1 AS col +``` + +1. First (breaking) change in `dev`: +```sql +MODEL (name test.a, kind FULL); +SELECT 2 AS col +``` + +??? "Output plan example #1" + + ```bash + New environment `dev` will be created from `prod` + + Differences from the `prod` environment: + + Models: + └── Directly Modified: + └── sqlmesh_example__dev.test_model + + --- + +++ + + + kind FULL + ) + SELECT + - 1 AS col + + 2 AS col + ``` + +3. Second (metadata) change in `dev`: +```sql +MODEL (name test.a, kind FULL, owner 'John Doe'); +SELECT 5 AS col +``` + +??? "Output plan example #2" + + ```bash + Differences from the `prod` environment: + + Models: + └── Directly Modified: + └── sqlmesh_example__dev.test_model + + --- + + +++ + + @@ -1,8 +1,9 @@ + + MODEL ( + name sqlmesh_example.test_model, + + owner "John Doe", + kind FULL + ) + SELECT + - 1 AS col + + 2 AS col + + Directly Modified: sqlmesh_example__dev.test_model (Breaking) + Models needing backfill: + └── sqlmesh_example__dev.test_model: [full refresh] + ``` + +Even though the second change should have been a metadata change (thus not requiring a backfill), it will still be classified as a breaking change because the comparison is against production instead of the previous development state. This is intentional and may cause additional backfills as more changes are accumulated. + + ### Gateways The `gateways` configuration defines how SQLMesh should connect to the data warehouse, state backend, and scheduler. These options are in the [gateway](../reference/configuration.md#gateway) section of the configuration reference page. diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 90049d6d09..32674871a2 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -150,7 +150,7 @@ def create( s.snapshot_id for s in initial_env.promoted_snapshots } - # Find the proper environment to diff against, this might be different than the "initial" (i.e user provided) environment + # Find the proper environment to diff against, this might be different than the initial (i.e user provided) environment # e.g it will default to prod if the plan option `always_compare_against_prod` is set. environment = _get_diff_environment(environment, state_reader, always_compare_against_prod) env = ( From fd6a42c2b3960b660d79a715799ab957547f727e Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Wed, 4 Jun 2025 18:59:35 +0300 Subject: [PATCH 5/9] Rename initial_environment --- sqlmesh/core/context_diff.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 32674871a2..7b5d9e52f2 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -129,8 +129,8 @@ def create( Returns: The ContextDiff object. """ - initial_environment = environment.lower() - initial_env = state_reader.get_environment(initial_environment) + initial_environment_name = environment.lower() + initial_env = state_reader.get_environment(initial_environment_name) create_from_env_exists = False if initial_env is None or initial_env.expired: @@ -155,7 +155,7 @@ def create( environment = _get_diff_environment(environment, state_reader, always_compare_against_prod) env = ( initial_env - if (initial_environment == environment) + if (initial_environment_name == environment) else state_reader.get_environment(environment) ) From 3329ee1cee0e1af17fbf8d5240427b65ebb5b646 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Thu, 5 Jun 2025 11:17:59 +0300 Subject: [PATCH 6/9] Switch impl to always_init_from_prod --- docs/guides/configuration.md | 12 ++++---- docs/reference/configuration.md | 2 +- sqlmesh/core/config/plan.py | 4 +-- sqlmesh/core/console.py | 16 +++-------- sqlmesh/core/context.py | 7 ++--- sqlmesh/core/context_diff.py | 50 +++++++++++---------------------- tests/core/test_integration.py | 22 ++++----------- 7 files changed, 38 insertions(+), 75 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 34779e1216..e809741fde 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -386,7 +386,7 @@ Example showing default values: By default, SQLMesh compares the current state of project files to the target `` environment when `sqlmesh plan ` is run. However, a common expectation is that local changes should always be compared to the production environment. -The `always_compare_against_prod` boolean plan option can alter this behavior. When enabled, SQLMesh will always attempt to compare against the production environment; If that does not exist, SQLMesh will fall back to comparing against the target environment. +The `always_init_from_prod` boolean plan option can alter this behavior. When enabled, SQLMesh will always attempt to compare against the production environment; If that does not exist, SQLMesh will fall back to comparing against the target environment. **NOTE:**: Upon succesfull plan application, changes are still promoted to the target `` environment. @@ -394,7 +394,7 @@ The `always_compare_against_prod` boolean plan option can alter this behavior. W ```yaml linenums="1" plan: - always_compare_against_prod: True + always_init_from_prod: True ``` === "Python" @@ -416,7 +416,7 @@ The `always_compare_against_prod` boolean plan option can alter this behavior. W #### Change Categorization Example -Consider this scenario with `always_compare_against_prod` enabled: +Consider this scenario with `always_init_from_prod` enabled: 1. Initial state in `prod`: ```sql @@ -426,7 +426,7 @@ SELECT 1 AS col 1. First (breaking) change in `dev`: ```sql -MODEL (name test.a, kind FULL); +MODEL (name sqlmesh_example__dev.test_model, kind FULL); SELECT 2 AS col ``` @@ -454,13 +454,15 @@ SELECT 2 AS col 3. Second (metadata) change in `dev`: ```sql -MODEL (name test.a, kind FULL, owner 'John Doe'); +MODEL (name sqlmesh_example__dev.test_model, kind FULL, owner 'John Doe'); SELECT 5 AS col ``` ??? "Output plan example #2" ```bash + New environment `dev` will be created from `prod` + Differences from the `prod` environment: Models: diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 79484b50cc..dbf4d4bd2b 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -80,7 +80,7 @@ Configuration for the `sqlmesh plan` command. | `enable_preview` | Indicates whether to enable [data preview](../concepts/plans.md#data-preview) for forward-only models when targeting a development environment (Default: True, except for dbt projects where the target engine does not support cloning) | Boolean | N | | `no_diff` | Don't show diffs for changed models (Default: False) | boolean | N | | `no_prompts` | Disables interactive prompts in CLI (Default: True) | boolean | N | - +| `always_init_from_prod` | Always recreates the target environment from the environment specified in `create_from` (by default `prod`) (Default: False) | boolean | N | ## Run Configuration for the `sqlmesh run` command. Please note that this is only applicable when configured with the [builtin](#builtin) scheduler. diff --git a/sqlmesh/core/config/plan.py b/sqlmesh/core/config/plan.py index 456b5339db..f23955ec42 100644 --- a/sqlmesh/core/config/plan.py +++ b/sqlmesh/core/config/plan.py @@ -20,7 +20,7 @@ class PlanConfig(BaseConfig): auto_apply: Whether to automatically apply the new plan after creation. use_finalized_state: Whether to compare against the latest finalized environment state, or to use whatever state the target environment is currently in. - always_compare_against_prod: Whether to always compare against production when planning, even if the target environment exists. + always_init_from_prod: Whether to always recreate the target environment from the prod environment. """ forward_only: bool = False @@ -31,4 +31,4 @@ class PlanConfig(BaseConfig): no_prompts: bool = True auto_apply: bool = False use_finalized_state: bool = False - always_compare_against_prod: bool = False + always_init_from_prod: bool = False diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py index aa03d3cc70..a8cf4a6bb4 100644 --- a/sqlmesh/core/console.py +++ b/sqlmesh/core/console.py @@ -219,7 +219,6 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, - environment: t.Optional[str] = None, ) -> None: """Displays a summary of differences for the environment.""" @@ -646,7 +645,6 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, - environment: t.Optional[str] = None, ) -> None: pass @@ -1526,7 +1524,6 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, - environment: t.Optional[str] = None, ) -> None: """Shows a summary of the environment differences. @@ -1536,11 +1533,10 @@ def show_environment_difference_summary( environment: The initial target environment """ if context_diff.is_new_environment: - new_environment = environment or context_diff.environment msg = ( - f"\n`{new_environment}` environment will be initialized" + f"\n`{context_diff.environment}` environment will be initialized" if not context_diff.create_from_env_exists - else f"\nNew environment `{new_environment}` will be created from `{context_diff.create_from}`" + else f"\nNew environment `{context_diff.environment}` will be created from `{context_diff.create_from}`" ) self._print(Tree(f"[bold]{msg}\n")) if not context_diff.has_snapshot_changes: @@ -1791,7 +1787,6 @@ def _prompt_categorize( self.show_environment_difference_summary( plan.context_diff, no_diff=no_diff, - environment=plan_builder.environment_naming_info.name, ) if plan.context_diff.has_changes: @@ -2904,7 +2899,6 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, - environment: t.Optional[str] = None, ) -> None: """Shows a summary of the environment differences. @@ -2914,11 +2908,10 @@ def show_environment_difference_summary( environment: The initial target environment """ if context_diff.is_new_environment: - new_environment = environment or context_diff.environment msg = ( - f"\n**`{new_environment}` environment will be initialized**" + f"\n**`{context_diff.environment}` environment will be initialized**" if not context_diff.create_from_env_exists - else f"\n**New environment `{new_environment}` will be created from `{context_diff.create_from}`**" + else f"\n**New environment `{context_diff.environment}` will be created from `{context_diff.create_from}`**" ) self._print(msg) if not context_diff.has_snapshot_changes: @@ -3510,7 +3503,6 @@ def show_environment_difference_summary( self, context_diff: ContextDiff, no_diff: bool = True, - environment: t.Optional[str] = None, ) -> None: self._write("Environment Difference Summary:") diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 6920eb02c1..b748a72aab 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -1487,7 +1487,7 @@ def plan_builder( or (backfill_models is not None and not backfill_models), ensure_finalized_snapshots=self.config.plan.use_finalized_state, diff_rendered=diff_rendered, - always_compare_against_prod=self.config.plan.always_compare_against_prod, + always_init_from_prod=self.config.plan.always_init_from_prod, ) modified_model_names = { *context_diff.modified_snapshots, @@ -1644,7 +1644,6 @@ def diff(self, environment: t.Optional[str] = None, detailed: bool = False) -> b self.console.show_environment_difference_summary( context_diff, no_diff=not detailed, - environment=environment, ) if context_diff.has_changes: self.console.show_model_difference_summary( @@ -2631,7 +2630,7 @@ def _context_diff( force_no_diff: bool = False, ensure_finalized_snapshots: bool = False, diff_rendered: bool = False, - always_compare_against_prod: bool = False, + always_init_from_prod: bool = False, ) -> ContextDiff: environment = Environment.sanitize_name(environment) if force_no_diff: @@ -2649,7 +2648,7 @@ def _context_diff( environment_statements=self._environment_statements, gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, infer_python_dependencies=self.config.infer_python_dependencies, - always_compare_against_prod=always_compare_against_prod, + always_init_from_prod=always_init_from_prod, ) def _destroy(self) -> None: diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 7b5d9e52f2..2025544f03 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -104,7 +104,7 @@ def create( environment_statements: t.Optional[t.List[EnvironmentStatements]] = [], gateway_managed_virtual_layer: bool = False, infer_python_dependencies: bool = True, - always_compare_against_prod: bool = False, + always_init_from_prod: bool = False, ) -> ContextDiff: """Create a ContextDiff object. @@ -129,35 +129,25 @@ def create( Returns: The ContextDiff object. """ - initial_environment_name = environment.lower() - initial_env = state_reader.get_environment(initial_environment_name) - + environment = environment.lower() + existing_env = state_reader.get_environment(environment) create_from_env_exists = False - if initial_env is None or initial_env.expired: - initial_env = state_reader.get_environment(create_from.lower()) - if not initial_env and create_from != c.PROD: + if existing_env is None or existing_env.expired or always_init_from_prod: + env = state_reader.get_environment(create_from.lower()) + + if not env and create_from != c.PROD: get_console().log_warning( f"The environment name '{create_from}' was passed to the `plan` command's `--create-from` argument, but '{create_from}' does not exist. Initializing new environment '{environment}' from scratch." ) is_new_environment = True - create_from_env_exists = initial_env is not None + create_from_env_exists = env is not None previously_promoted_snapshot_ids = set() else: + env = existing_env is_new_environment = False - previously_promoted_snapshot_ids = { - s.snapshot_id for s in initial_env.promoted_snapshots - } - - # Find the proper environment to diff against, this might be different than the initial (i.e user provided) environment - # e.g it will default to prod if the plan option `always_compare_against_prod` is set. - environment = _get_diff_environment(environment, state_reader, always_compare_against_prod) - env = ( - initial_env - if (initial_environment_name == environment) - else state_reader.get_environment(environment) - ) + previously_promoted_snapshot_ids = {s.snapshot_id for s in env.promoted_snapshots} environment_snapshot_infos = [] if env: @@ -233,6 +223,11 @@ def create( previous_environment_statements = state_reader.get_environment_statements(environment) + if existing_env and always_init_from_prod: + previous_plan_id: t.Optional[str] = existing_env.plan_id + else: + previous_plan_id = env.plan_id if env and not is_new_environment else None + return ContextDiff( environment=environment, is_new_environment=is_new_environment, @@ -245,9 +240,7 @@ def create( modified_snapshots=modified_snapshots, snapshots=merged_snapshots, new_snapshots=new_snapshots, - previous_plan_id=initial_env.plan_id - if initial_env and not is_new_environment - else None, + previous_plan_id=previous_plan_id, previously_promoted_snapshot_ids=previously_promoted_snapshot_ids, previous_finalized_snapshots=env.previous_finalized_snapshots if env else None, previous_requirements=env.requirements if env else {}, @@ -494,17 +487,6 @@ def text_diff(self, name: str) -> str: return "" -def _get_diff_environment( - environment: str, state_reader: StateReader, always_compare_against_prod: bool = False -) -> str: - if always_compare_against_prod: - prod = state_reader.get_environment(c.PROD) - if prod: - environment = c.PROD - - return environment.lower() - - def _build_requirements( provided_requirements: t.Dict[str, str], excluded_requirements: t.Set[str], diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index fdf79ea36d..8a875dd405 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -6256,7 +6256,7 @@ def test_render_path_instead_of_model(tmp_path: Path): @use_terminal_console -def test_plan_always_compare_against_prod(mocker: MockerFixture, tmp_path: Path): +def test_plan_always_init_from_prod(tmp_path: Path): def plan_with_output(ctx: Context, environment: str): with patch.object(logger, "info") as mock_logger: with capture_output() as output: @@ -6268,11 +6268,6 @@ def plan_with_output(ctx: Context, environment: str): return output - def assert_environments(ctx: Context, input_env: str, promote_env: str, diff_env: str): - plan_builder = ctx.plan_builder(input_env) - assert plan_builder.environment_naming_info.name == promote_env - assert plan_builder.build().context_diff.environment == diff_env - models_dir = tmp_path / "models" logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") @@ -6281,14 +6276,13 @@ def assert_environments(ctx: Context, input_env: str, promote_env: str, diff_env tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" ) - config = Config(plan=PlanConfig(always_compare_against_prod=True)) + config = Config(plan=PlanConfig(always_init_from_prod=True)) ctx = Context(paths=[tmp_path], config=config) # Case 1: Neither prod nor dev exists, so dev is initialized output = plan_with_output(ctx, "dev") assert """`dev` environment will be initialized""" in output.stdout - assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="dev") # Case 2: Prod does not exist, so dev is updated create_temp_file( @@ -6296,16 +6290,12 @@ def assert_environments(ctx: Context, input_env: str, promote_env: str, diff_env ) output = plan_with_output(ctx, "dev") - - assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="dev") - assert "Differences from the `dev` environment" in output.stdout + assert "`dev` environment will be initialized" in output.stdout # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod output = plan_with_output(ctx, "prod") assert "`prod` environment will be initialized" in output.stdout - assert_environments(ctx, input_env="prod", promote_env="prod", diff_env="prod") - # Case 4: Dev is updated with a breaking change. Prod exists now so plan comparisons moving forward should be against prod create_temp_file( tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" @@ -6314,14 +6304,13 @@ def assert_environments(ctx: Context, input_env: str, promote_env: str, diff_env plan = ctx.plan_builder("dev").build() - assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="prod") - assert ( next(iter(plan.context_diff.snapshots.values())).change_category == SnapshotChangeCategory.BREAKING ) output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout assert "Differences from the `prod` environment" in output.stdout # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes @@ -6335,14 +6324,13 @@ def assert_environments(ctx: Context, input_env: str, promote_env: str, diff_env plan = ctx.plan_builder("dev").build() - assert_environments(ctx, input_env="dev", promote_env="dev", diff_env="prod") - assert ( next(iter(plan.context_diff.snapshots.values())).change_category == SnapshotChangeCategory.BREAKING ) output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout assert "Differences from the `prod` environment" in output.stdout assert ( From 99e921a67b71d83a039cd3d28c6ffdb01847d6f3 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Fri, 6 Jun 2025 11:17:20 +0300 Subject: [PATCH 7/9] Rename to always_recreate_environment, PR feedback 3 --- docs/guides/configuration.md | 6 +++--- docs/reference/configuration.md | 2 +- sqlmesh/core/config/plan.py | 4 ++-- sqlmesh/core/console.py | 2 -- sqlmesh/core/context.py | 6 +++--- sqlmesh/core/context_diff.py | 6 +++--- sqlmesh/core/plan/builder.py | 2 +- tests/core/test_integration.py | 4 ++-- 8 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index e809741fde..a7be1b145b 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -386,7 +386,7 @@ Example showing default values: By default, SQLMesh compares the current state of project files to the target `` environment when `sqlmesh plan ` is run. However, a common expectation is that local changes should always be compared to the production environment. -The `always_init_from_prod` boolean plan option can alter this behavior. When enabled, SQLMesh will always attempt to compare against the production environment; If that does not exist, SQLMesh will fall back to comparing against the target environment. +The `always_recreate_environment` boolean plan option can alter this behavior. When enabled, SQLMesh will always attempt to compare against the production environment by recreating the target environment; If `prod` does not exist, SQLMesh will fall back to comparing against the target environment. **NOTE:**: Upon succesfull plan application, changes are still promoted to the target `` environment. @@ -394,7 +394,7 @@ The `always_init_from_prod` boolean plan option can alter this behavior. When en ```yaml linenums="1" plan: - always_init_from_prod: True + always_recreate_environment: True ``` === "Python" @@ -416,7 +416,7 @@ The `always_init_from_prod` boolean plan option can alter this behavior. When en #### Change Categorization Example -Consider this scenario with `always_init_from_prod` enabled: +Consider this scenario with `always_recreate_environment` enabled: 1. Initial state in `prod`: ```sql diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index dbf4d4bd2b..e00c9dee1f 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -80,7 +80,7 @@ Configuration for the `sqlmesh plan` command. | `enable_preview` | Indicates whether to enable [data preview](../concepts/plans.md#data-preview) for forward-only models when targeting a development environment (Default: True, except for dbt projects where the target engine does not support cloning) | Boolean | N | | `no_diff` | Don't show diffs for changed models (Default: False) | boolean | N | | `no_prompts` | Disables interactive prompts in CLI (Default: True) | boolean | N | -| `always_init_from_prod` | Always recreates the target environment from the environment specified in `create_from` (by default `prod`) (Default: False) | boolean | N | +| `always_recreate_environment` | Always recreates the target environment from the environment specified in `create_from` (by default `prod`) (Default: False) | boolean | N | ## Run Configuration for the `sqlmesh run` command. Please note that this is only applicable when configured with the [builtin](#builtin) scheduler. diff --git a/sqlmesh/core/config/plan.py b/sqlmesh/core/config/plan.py index f23955ec42..fdfb7048a7 100644 --- a/sqlmesh/core/config/plan.py +++ b/sqlmesh/core/config/plan.py @@ -20,7 +20,7 @@ class PlanConfig(BaseConfig): auto_apply: Whether to automatically apply the new plan after creation. use_finalized_state: Whether to compare against the latest finalized environment state, or to use whatever state the target environment is currently in. - always_init_from_prod: Whether to always recreate the target environment from the prod environment. + always_recreate_environment: Whether to always recreate the target environment from the prod environment. """ forward_only: bool = False @@ -31,4 +31,4 @@ class PlanConfig(BaseConfig): no_prompts: bool = True auto_apply: bool = False use_finalized_state: bool = False - always_init_from_prod: bool = False + always_recreate_environment: bool = False diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py index a8cf4a6bb4..e272442e67 100644 --- a/sqlmesh/core/console.py +++ b/sqlmesh/core/console.py @@ -1530,7 +1530,6 @@ def show_environment_difference_summary( Args: context_diff: The context diff to use to print the summary no_diff: Hide the actual environment statement differences. - environment: The initial target environment """ if context_diff.is_new_environment: msg = ( @@ -2905,7 +2904,6 @@ def show_environment_difference_summary( Args: context_diff: The context diff to use to print the summary. no_diff: Hide the actual environment statements differences. - environment: The initial target environment """ if context_diff.is_new_environment: msg = ( diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index b748a72aab..f4f23213ab 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -1487,7 +1487,7 @@ def plan_builder( or (backfill_models is not None and not backfill_models), ensure_finalized_snapshots=self.config.plan.use_finalized_state, diff_rendered=diff_rendered, - always_init_from_prod=self.config.plan.always_init_from_prod, + always_recreate_environment=self.config.plan.always_recreate_environment, ) modified_model_names = { *context_diff.modified_snapshots, @@ -2630,7 +2630,7 @@ def _context_diff( force_no_diff: bool = False, ensure_finalized_snapshots: bool = False, diff_rendered: bool = False, - always_init_from_prod: bool = False, + always_recreate_environment: bool = False, ) -> ContextDiff: environment = Environment.sanitize_name(environment) if force_no_diff: @@ -2648,7 +2648,7 @@ def _context_diff( environment_statements=self._environment_statements, gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, infer_python_dependencies=self.config.infer_python_dependencies, - always_init_from_prod=always_init_from_prod, + always_recreate_environment=always_recreate_environment, ) def _destroy(self) -> None: diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 2025544f03..65c3a223e9 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -104,7 +104,7 @@ def create( environment_statements: t.Optional[t.List[EnvironmentStatements]] = [], gateway_managed_virtual_layer: bool = False, infer_python_dependencies: bool = True, - always_init_from_prod: bool = False, + always_recreate_environment: bool = False, ) -> ContextDiff: """Create a ContextDiff object. @@ -133,7 +133,7 @@ def create( existing_env = state_reader.get_environment(environment) create_from_env_exists = False - if existing_env is None or existing_env.expired or always_init_from_prod: + if existing_env is None or existing_env.expired or always_recreate_environment: env = state_reader.get_environment(create_from.lower()) if not env and create_from != c.PROD: @@ -223,7 +223,7 @@ def create( previous_environment_statements = state_reader.get_environment_statements(environment) - if existing_env and always_init_from_prod: + if existing_env and always_recreate_environment: previous_plan_id: t.Optional[str] = existing_env.plan_id else: previous_plan_id = env.plan_id if env and not is_new_environment else None diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py index c8acb8f22d..25a21e0844 100644 --- a/sqlmesh/core/plan/builder.py +++ b/sqlmesh/core/plan/builder.py @@ -160,7 +160,7 @@ def __init__( self.override_end = end is not None self.environment_naming_info = EnvironmentNamingInfo.from_environment_catalog_mapping( environment_catalog_mapping or {}, - name=environment, + name=self._context_diff.environment, suffix_target=environment_suffix_target, normalize_name=self._context_diff.normalize_environment_name, gateway_managed=self._context_diff.gateway_managed_virtual_layer, diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index 8a875dd405..af62566971 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -6256,7 +6256,7 @@ def test_render_path_instead_of_model(tmp_path: Path): @use_terminal_console -def test_plan_always_init_from_prod(tmp_path: Path): +def test_plan_always_recreate_environment(tmp_path: Path): def plan_with_output(ctx: Context, environment: str): with patch.object(logger, "info") as mock_logger: with capture_output() as output: @@ -6276,7 +6276,7 @@ def plan_with_output(ctx: Context, environment: str): tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" ) - config = Config(plan=PlanConfig(always_init_from_prod=True)) + config = Config(plan=PlanConfig(always_recreate_environment=True)) ctx = Context(paths=[tmp_path], config=config) # Case 1: Neither prod nor dev exists, so dev is initialized From fbe054341811885fb626a0fe5c2cf66036f47716 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Fri, 6 Jun 2025 11:47:53 +0300 Subject: [PATCH 8/9] Minor fixes --- sqlmesh/core/config/plan.py | 2 +- sqlmesh/core/context.py | 1 - sqlmesh/core/context_diff.py | 1 - sqlmesh/core/plan/builder.py | 3 +-- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sqlmesh/core/config/plan.py b/sqlmesh/core/config/plan.py index fdfb7048a7..df1ca44873 100644 --- a/sqlmesh/core/config/plan.py +++ b/sqlmesh/core/config/plan.py @@ -20,7 +20,7 @@ class PlanConfig(BaseConfig): auto_apply: Whether to automatically apply the new plan after creation. use_finalized_state: Whether to compare against the latest finalized environment state, or to use whatever state the target environment is currently in. - always_recreate_environment: Whether to always recreate the target environment from the prod environment. + always_recreate_environment: Whether to always recreate the target environment from the `create_from` environment. """ forward_only: bool = False diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index f4f23213ab..0450827d6e 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -1521,7 +1521,6 @@ def plan_builder( return self.PLAN_BUILDER_TYPE( context_diff=context_diff, - environment=environment or c.PROD, start=start, end=end, execution_time=execution_time, diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 65c3a223e9..e9ab24183a 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -14,7 +14,6 @@ import sys import typing as t - from difflib import ndiff, unified_diff from functools import cached_property from sqlmesh.core import constants as c diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py index 25a21e0844..27b81f5d74 100644 --- a/sqlmesh/core/plan/builder.py +++ b/sqlmesh/core/plan/builder.py @@ -6,7 +6,7 @@ from collections import defaultdict from functools import cached_property -from sqlmesh.core import constants as c + from sqlmesh.core.console import PlanBuilderConsole, get_console from sqlmesh.core.config import ( AutoCategorizationMode, @@ -117,7 +117,6 @@ def __init__( interval_end_per_model: t.Optional[t.Dict[str, int]] = None, console: t.Optional[PlanBuilderConsole] = None, user_provided_flags: t.Optional[t.Dict[str, UserProvidedFlags]] = None, - environment: str = c.PROD, ): self._context_diff = context_diff self._no_gaps = no_gaps From 751cee90c2b354e204892e5b2c8dfed8443fef31 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Fri, 6 Jun 2025 16:57:23 +0300 Subject: [PATCH 9/9] Ensure plan option does not work if target env == create_from --- sqlmesh/core/context_diff.py | 4 +++- tests/core/test_integration.py | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index e9ab24183a..354779a3e1 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -132,7 +132,9 @@ def create( existing_env = state_reader.get_environment(environment) create_from_env_exists = False - if existing_env is None or existing_env.expired or always_recreate_environment: + recreate_environment = always_recreate_environment and not environment == create_from + + if existing_env is None or existing_env.expired or recreate_environment: env = state_reader.get_environment(create_from.lower()) if not env and create_from != c.PROD: diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index af62566971..8725318506 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -6345,7 +6345,11 @@ def plan_with_output(ctx: Context, environment: str): in output.stdout ) - # Case 6: Check that we can still run Context::diff() against any environment + # Case 6: Ensure that target environment and create_from environment are not the same + output = plan_with_output(ctx, "prod") + assert not "New environment `prod` will be created from `prod`" in output.stdout + + # Case 7: Check that we can still run Context::diff() against any environment for environment in ["dev", "prod"]: context_diff = ctx._context_diff(environment) assert context_diff.environment == environment