diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 561996594c..a7be1b145b 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -381,6 +381,117 @@ Example showing default values: ) ``` + +### Always comparing against production + +By default, SQLMesh compares the current state of project files to the target `` environment when `sqlmesh plan ` is run. However, a common expectation is that local changes should always be compared to the production environment. + +The `always_recreate_environment` boolean plan option can alter this behavior. When enabled, SQLMesh will always attempt to compare against the production environment by recreating the target environment; If `prod` does not exist, SQLMesh will fall back to comparing against the target environment. + +**NOTE:**: Upon succesfull plan application, changes are still promoted to the target `` environment. + +=== "YAML" + + ```yaml linenums="1" + plan: + always_recreate_environment: True + ``` + +=== "Python" + + ```python linenums="1" + from sqlmesh.core.config import ( + Config, + ModelDefaultsConfig, + PlanConfig, + ) + + config = Config( + model_defaults=ModelDefaultsConfig(dialect=), + plan=PlanConfig( + always_compare_against_prod=True, + ), + ) + ``` + +#### Change Categorization Example + +Consider this scenario with `always_recreate_environment` enabled: + +1. Initial state in `prod`: +```sql +MODEL (name sqlmesh_example.test_model, kind FULL); +SELECT 1 AS col +``` + +1. First (breaking) change in `dev`: +```sql +MODEL (name sqlmesh_example__dev.test_model, kind FULL); +SELECT 2 AS col +``` + +??? "Output plan example #1" + + ```bash + New environment `dev` will be created from `prod` + + Differences from the `prod` environment: + + Models: + └── Directly Modified: + └── sqlmesh_example__dev.test_model + + --- + +++ + + + kind FULL + ) + SELECT + - 1 AS col + + 2 AS col + ``` + +3. Second (metadata) change in `dev`: +```sql +MODEL (name sqlmesh_example__dev.test_model, kind FULL, owner 'John Doe'); +SELECT 5 AS col +``` + +??? "Output plan example #2" + + ```bash + New environment `dev` will be created from `prod` + + Differences from the `prod` environment: + + Models: + └── Directly Modified: + └── sqlmesh_example__dev.test_model + + --- + + +++ + + @@ -1,8 +1,9 @@ + + MODEL ( + name sqlmesh_example.test_model, + + owner "John Doe", + kind FULL + ) + SELECT + - 1 AS col + + 2 AS col + + Directly Modified: sqlmesh_example__dev.test_model (Breaking) + Models needing backfill: + └── sqlmesh_example__dev.test_model: [full refresh] + ``` + +Even though the second change should have been a metadata change (thus not requiring a backfill), it will still be classified as a breaking change because the comparison is against production instead of the previous development state. This is intentional and may cause additional backfills as more changes are accumulated. + + ### Gateways The `gateways` configuration defines how SQLMesh should connect to the data warehouse, state backend, and scheduler. These options are in the [gateway](../reference/configuration.md#gateway) section of the configuration reference page. diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 79484b50cc..e00c9dee1f 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -80,7 +80,7 @@ Configuration for the `sqlmesh plan` command. | `enable_preview` | Indicates whether to enable [data preview](../concepts/plans.md#data-preview) for forward-only models when targeting a development environment (Default: True, except for dbt projects where the target engine does not support cloning) | Boolean | N | | `no_diff` | Don't show diffs for changed models (Default: False) | boolean | N | | `no_prompts` | Disables interactive prompts in CLI (Default: True) | boolean | N | - +| `always_recreate_environment` | Always recreates the target environment from the environment specified in `create_from` (by default `prod`) (Default: False) | boolean | N | ## Run Configuration for the `sqlmesh run` command. Please note that this is only applicable when configured with the [builtin](#builtin) scheduler. diff --git a/sqlmesh/core/config/plan.py b/sqlmesh/core/config/plan.py index cac0b3fd70..df1ca44873 100644 --- a/sqlmesh/core/config/plan.py +++ b/sqlmesh/core/config/plan.py @@ -20,6 +20,7 @@ class PlanConfig(BaseConfig): auto_apply: Whether to automatically apply the new plan after creation. use_finalized_state: Whether to compare against the latest finalized environment state, or to use whatever state the target environment is currently in. + always_recreate_environment: Whether to always recreate the target environment from the `create_from` environment. """ forward_only: bool = False @@ -30,3 +31,4 @@ class PlanConfig(BaseConfig): no_prompts: bool = True auto_apply: bool = False use_finalized_state: bool = False + always_recreate_environment: bool = False diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index de5ee6ede9..0450827d6e 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -1487,6 +1487,7 @@ def plan_builder( or (backfill_models is not None and not backfill_models), ensure_finalized_snapshots=self.config.plan.use_finalized_state, diff_rendered=diff_rendered, + always_recreate_environment=self.config.plan.always_recreate_environment, ) modified_model_names = { *context_diff.modified_snapshots, @@ -2628,6 +2629,7 @@ def _context_diff( force_no_diff: bool = False, ensure_finalized_snapshots: bool = False, diff_rendered: bool = False, + always_recreate_environment: bool = False, ) -> ContextDiff: environment = Environment.sanitize_name(environment) if force_no_diff: @@ -2645,6 +2647,7 @@ def _context_diff( environment_statements=self._environment_statements, gateway_managed_virtual_layer=self.config.gateway_managed_virtual_layer, infer_python_dependencies=self.config.infer_python_dependencies, + always_recreate_environment=always_recreate_environment, ) def _destroy(self) -> None: diff --git a/sqlmesh/core/context_diff.py b/sqlmesh/core/context_diff.py index 4212f328b1..354779a3e1 100644 --- a/sqlmesh/core/context_diff.py +++ b/sqlmesh/core/context_diff.py @@ -103,6 +103,7 @@ def create( environment_statements: t.Optional[t.List[EnvironmentStatements]] = [], gateway_managed_virtual_layer: bool = False, infer_python_dependencies: bool = True, + always_recreate_environment: bool = False, ) -> ContextDiff: """Create a ContextDiff object. @@ -128,10 +129,12 @@ def create( The ContextDiff object. """ environment = environment.lower() - env = state_reader.get_environment(environment) - + existing_env = state_reader.get_environment(environment) create_from_env_exists = False - if env is None or env.expired: + + recreate_environment = always_recreate_environment and not environment == create_from + + if existing_env is None or existing_env.expired or recreate_environment: env = state_reader.get_environment(create_from.lower()) if not env and create_from != c.PROD: @@ -143,6 +146,7 @@ def create( create_from_env_exists = env is not None previously_promoted_snapshot_ids = set() else: + env = existing_env is_new_environment = False previously_promoted_snapshot_ids = {s.snapshot_id for s in env.promoted_snapshots} @@ -220,6 +224,11 @@ def create( previous_environment_statements = state_reader.get_environment_statements(environment) + if existing_env and always_recreate_environment: + previous_plan_id: t.Optional[str] = existing_env.plan_id + else: + previous_plan_id = env.plan_id if env and not is_new_environment else None + return ContextDiff( environment=environment, is_new_environment=is_new_environment, @@ -232,7 +241,7 @@ def create( modified_snapshots=modified_snapshots, snapshots=merged_snapshots, new_snapshots=new_snapshots, - previous_plan_id=env.plan_id if env and not is_new_environment else None, + previous_plan_id=previous_plan_id, previously_promoted_snapshot_ids=previously_promoted_snapshot_ids, previous_finalized_snapshots=env.previous_finalized_snapshots if env else None, previous_requirements=env.requirements if env else {}, diff --git a/tests/core/test_integration.py b/tests/core/test_integration.py index bfc416596b..8725318506 100644 --- a/tests/core/test_integration.py +++ b/tests/core/test_integration.py @@ -6,7 +6,7 @@ from datetime import timedelta from unittest import mock from unittest.mock import patch - +import logging import os import numpy as np # noqa: TID253 import pandas as pd # noqa: TID253 @@ -37,6 +37,7 @@ from sqlmesh.core.console import Console, get_console from sqlmesh.core.context import Context from sqlmesh.core.config.categorizer import CategorizerConfig +from sqlmesh.core.config.plan import PlanConfig from sqlmesh.core.engine_adapter import EngineAdapter from sqlmesh.core.environment import EnvironmentNamingInfo from sqlmesh.core.macros import macro @@ -6252,3 +6253,103 @@ def test_render_path_instead_of_model(tmp_path: Path): # Case 3: Render the model successfully assert ctx.render("test_model").sql() == 'SELECT 1 AS "col"' + + +@use_terminal_console +def test_plan_always_recreate_environment(tmp_path: Path): + def plan_with_output(ctx: Context, environment: str): + with patch.object(logger, "info") as mock_logger: + with capture_output() as output: + ctx.load() + ctx.plan(environment, no_prompts=True, auto_apply=True) + + # Facade logs info "Promoting environment {environment}" + assert mock_logger.call_args[0][1] == environment + + return output + + models_dir = tmp_path / "models" + + logger = logging.getLogger("sqlmesh.core.state_sync.db.facade") + + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 1 AS col" + ) + + config = Config(plan=PlanConfig(always_recreate_environment=True)) + ctx = Context(paths=[tmp_path], config=config) + + # Case 1: Neither prod nor dev exists, so dev is initialized + output = plan_with_output(ctx, "dev") + + assert """`dev` environment will be initialized""" in output.stdout + + # Case 2: Prod does not exist, so dev is updated + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 5 AS col" + ) + + output = plan_with_output(ctx, "dev") + assert "`dev` environment will be initialized" in output.stdout + + # Case 3: Prod is initialized, so plan comparisons moving forward should be against prod + output = plan_with_output(ctx, "prod") + assert "`prod` environment will be initialized" in output.stdout + + # Case 4: Dev is updated with a breaking change. Prod exists now so plan comparisons moving forward should be against prod + create_temp_file( + tmp_path, models_dir / "a.sql", "MODEL (name test.a, kind FULL); SELECT 10 AS col" + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout + assert "Differences from the `prod` environment" in output.stdout + + # Case 5: Dev is updated with a metadata change, but comparison against prod shows both the previous and the current changes + # so it's still classified as a breaking change + create_temp_file( + tmp_path, + models_dir / "a.sql", + "MODEL (name test.a, kind FULL, owner 'test'); SELECT 10 AS col", + ) + ctx.load() + + plan = ctx.plan_builder("dev").build() + + assert ( + next(iter(plan.context_diff.snapshots.values())).change_category + == SnapshotChangeCategory.BREAKING + ) + + output = plan_with_output(ctx, "dev") + assert "New environment `dev` will be created from `prod`" in output.stdout + assert "Differences from the `prod` environment" in output.stdout + + assert ( + """MODEL ( + name test.a, ++ owner test, + kind FULL + ) + SELECT +- 5 AS col ++ 10 AS col""" + in output.stdout + ) + + # Case 6: Ensure that target environment and create_from environment are not the same + output = plan_with_output(ctx, "prod") + assert not "New environment `prod` will be created from `prod`" in output.stdout + + # Case 7: Check that we can still run Context::diff() against any environment + for environment in ["dev", "prod"]: + context_diff = ctx._context_diff(environment) + assert context_diff.environment == environment