Skip to content

Commit 6ececb1

Browse files
committed
test(integration): live binding validator end-to-end against real BigQuery (#105 PR 2a)
Adds TestBindingValidationLive::test_validator_end_to_end_against_real_bigquery to tests/test_integration_ontology_binding.py, gated on RUN_LIVE_BIGQUERY_TESTS=1 alongside the other live tests in this module. Self-contained: uses its own per-test scratch dataset (function- scope fixtures, not the module-scoped ones the rest of the file shares) because phase 4 of the test deliberately drops a column via ALTER TABLE. Running destructive SQL against the shared dataset would interfere with other tests in this file. Phases: 1. Materialize real tables via OntologyMaterializer (executes real CREATE TABLE IF NOT EXISTS for entity + relationship tables, including SDK metadata columns). 2. Default-mode validation: report.ok must be True; warnings contain only KEY_COLUMN_NULLABLE entries (because CREATE TABLE IF NOT EXISTS emits NULLABLE keys without NOT NULL constraints). 3. Strict-mode validation: same input must surface those four warnings as KEY_COLUMN_NULLABLE failures, with warnings empty (escalated, not duplicated). 4. Drop the 'confidence' column via real ALTER TABLE; default- mode re-validation must emit exactly one MISSING_COLUMN failure pointing at binding.entities[0].properties[1].column (binding YAML order: decision_id at [0], confidence at [1]). Verified live against test-project-0728-467323 (raincoatrun@): PASSED in 13.28s. Skipped automatically without RUN_LIVE_BIGQUERY_TESTS=1.
1 parent 29e016e commit 6ececb1

1 file changed

Lines changed: 213 additions & 0 deletions

File tree

tests/test_integration_ontology_binding.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,3 +614,216 @@ def test_synthetic_lineage_query(
614614
)
615615
rows = list(job.result())
616616
assert len(rows) > 0, "Lineage GQL returned 0 rows"
617+
618+
619+
# ------------------------------------------------------------------ #
620+
# Binding-validator live test (issue #105 PR 2a) #
621+
# ------------------------------------------------------------------ #
622+
623+
624+
class TestBindingValidationLive:
625+
"""Live validation that ``validate_binding_against_bigquery``
626+
behaves correctly against real BigQuery.
627+
628+
Self-contained: uses its own per-test scratch dataset (rather
629+
than the module-scoped fixture) because the third phase of this
630+
test deliberately drops a column via ALTER TABLE, and running
631+
destructive SQL against a shared dataset would interfere with
632+
other tests in this file.
633+
634+
Phases:
635+
1. Materialize real tables via OntologyMaterializer.
636+
2. Default-mode validation: report.ok must be True; warnings
637+
contain only KEY_COLUMN_NULLABLE entries (because the SDK's
638+
CREATE TABLE IF NOT EXISTS emits NULLABLE keys).
639+
3. Strict-mode validation: same input must surface those
640+
warnings as KEY_COLUMN_NULLABLE failures, with warnings
641+
empty (escalated, not duplicated).
642+
4. Drop the 'confidence' column via real ALTER TABLE; default-
643+
mode re-validation must emit exactly one MISSING_COLUMN
644+
failure pointing at the dropped column.
645+
"""
646+
647+
@pytest.fixture(scope="function")
648+
def isolated_scratch(self):
649+
"""Per-test scratch dataset; cleaned up unconditionally."""
650+
from google.cloud import bigquery
651+
652+
run_id = uuid.uuid4().hex[:8]
653+
ds_id = f"bind_validate_live_{run_id}"
654+
client = bigquery.Client(project=_PROJECT, location=_LOCATION)
655+
ds = bigquery.Dataset(f"{_PROJECT}.{ds_id}")
656+
ds.location = _LOCATION
657+
ds.default_table_expiration_ms = 3600000
658+
client.create_dataset(ds, exists_ok=True)
659+
try:
660+
yield client, ds_id
661+
finally:
662+
client.delete_dataset(
663+
f"{_PROJECT}.{ds_id}",
664+
delete_contents=True,
665+
not_found_ok=True,
666+
)
667+
668+
@pytest.fixture(scope="function")
669+
def isolated_ontology_and_binding(self, isolated_scratch, tmp_path_factory):
670+
"""Per-test ontology+binding pointing at the isolated scratch."""
671+
from bigquery_ontology import load_binding
672+
from bigquery_ontology import load_ontology
673+
674+
_, ds_id = isolated_scratch
675+
tmp = tmp_path_factory.mktemp("bind_validate_live")
676+
677+
ont_path = tmp / "ontology.yaml"
678+
ont_path.write_text(
679+
"ontology: BindValidatorLive\n"
680+
"entities:\n"
681+
" - name: Decision\n"
682+
" keys:\n"
683+
" primary: [decision_id]\n"
684+
" properties:\n"
685+
" - name: decision_id\n"
686+
" type: string\n"
687+
" - name: confidence\n"
688+
" type: double\n"
689+
" - name: Outcome\n"
690+
" keys:\n"
691+
" primary: [outcome_id]\n"
692+
" properties:\n"
693+
" - name: outcome_id\n"
694+
" type: string\n"
695+
"relationships:\n"
696+
" - name: HasOutcome\n"
697+
" from: Decision\n"
698+
" to: Outcome\n"
699+
" properties:\n"
700+
" - name: weight\n"
701+
" type: double\n",
702+
encoding="utf-8",
703+
)
704+
705+
bnd_path = tmp / "binding.yaml"
706+
bnd_path.write_text(
707+
f"binding: live_check\n"
708+
f"ontology: BindValidatorLive\n"
709+
f"target:\n"
710+
f" backend: bigquery\n"
711+
f" project: {_PROJECT}\n"
712+
f" dataset: {ds_id}\n"
713+
f"entities:\n"
714+
f" - name: Decision\n"
715+
f" source: decisions\n"
716+
f" properties:\n"
717+
f" - name: decision_id\n"
718+
f" column: decision_id\n"
719+
f" - name: confidence\n"
720+
f" column: confidence\n"
721+
f" - name: Outcome\n"
722+
f" source: outcomes\n"
723+
f" properties:\n"
724+
f" - name: outcome_id\n"
725+
f" column: outcome_id\n"
726+
f"relationships:\n"
727+
f" - name: HasOutcome\n"
728+
f" source: edges\n"
729+
f" from_columns: [decision_id]\n"
730+
f" to_columns: [outcome_id]\n"
731+
f" properties:\n"
732+
f" - name: weight\n"
733+
f" column: weight\n",
734+
encoding="utf-8",
735+
)
736+
737+
ontology = load_ontology(str(ont_path))
738+
binding = load_binding(str(bnd_path), ontology=ontology)
739+
return ontology, binding
740+
741+
def test_validator_end_to_end_against_real_bigquery(
742+
self, isolated_scratch, isolated_ontology_and_binding
743+
):
744+
from bigquery_agent_analytics.binding_validation import FailureCode
745+
from bigquery_agent_analytics.binding_validation import validate_binding_against_bigquery
746+
from bigquery_agent_analytics.ontology_materializer import OntologyMaterializer
747+
748+
client, ds_id = isolated_scratch
749+
ontology, binding = isolated_ontology_and_binding
750+
751+
# Phase 1: materialize real tables.
752+
mat = OntologyMaterializer.from_ontology_binding(
753+
ontology=ontology,
754+
binding=binding,
755+
lineage_config=None,
756+
write_mode="batch_load",
757+
)
758+
tables = mat.create_tables()
759+
assert set(tables.keys()) == {
760+
"Decision",
761+
"Outcome",
762+
"HasOutcome",
763+
}, f"Unexpected tables created: {sorted(tables.keys())}"
764+
765+
# Phase 2: default-mode validation. SDK-created tables must
766+
# validate clean; the only signal is advisory warnings on
767+
# NULLABLE keys.
768+
default_report = validate_binding_against_bigquery(
769+
ontology=ontology, binding=binding, bq_client=client
770+
)
771+
assert default_report.ok is True, (
772+
f"Default mode rejected SDK-created tables. Failures: "
773+
f"{[(f.code, f.detail) for f in default_report.failures]}"
774+
)
775+
assert all(
776+
w.code == FailureCode.KEY_COLUMN_NULLABLE
777+
for w in default_report.warnings
778+
), (
779+
"Only KEY_COLUMN_NULLABLE warnings expected against SDK-"
780+
"created tables. Got: "
781+
f"{[w.code for w in default_report.warnings]}"
782+
)
783+
# Decision.decision_id, Outcome.outcome_id (entity primary keys)
784+
# plus HasOutcome.from_columns[0]=decision_id and
785+
# HasOutcome.to_columns[0]=outcome_id (relationship endpoints).
786+
assert len(default_report.warnings) == 4
787+
788+
# Phase 3: strict-mode escalation.
789+
strict_report = validate_binding_against_bigquery(
790+
ontology=ontology,
791+
binding=binding,
792+
bq_client=client,
793+
strict=True,
794+
)
795+
assert (
796+
strict_report.ok is False
797+
), "Strict mode should reject NULLABLE primary-key columns"
798+
assert all(
799+
f.code == FailureCode.KEY_COLUMN_NULLABLE
800+
for f in strict_report.failures
801+
)
802+
assert len(strict_report.failures) == 4
803+
assert strict_report.warnings == (), (
804+
"Strict mode must escalate warnings into failures, not "
805+
"double-emit them"
806+
)
807+
808+
# Phase 4: drop a non-key property column via real ALTER TABLE
809+
# and assert the validator catches the resulting drift.
810+
table_ref = f"{_PROJECT}.{ds_id}.decisions"
811+
client.query(f"ALTER TABLE `{table_ref}` DROP COLUMN confidence").result()
812+
813+
broken_report = validate_binding_against_bigquery(
814+
ontology=ontology, binding=binding, bq_client=client
815+
)
816+
miss = [
817+
f
818+
for f in broken_report.failures
819+
if f.code == FailureCode.MISSING_COLUMN and "confidence" in f.bq_ref
820+
]
821+
assert len(miss) == 1, (
822+
f"Expected exactly 1 MISSING_COLUMN for confidence, got "
823+
f"failures: "
824+
f"{[(f.code, f.bq_ref) for f in broken_report.failures]}"
825+
)
826+
# Path must reflect binding YAML order. Decision's binding lists
827+
# decision_id at properties[0] and confidence at properties[1].
828+
assert miss[0].binding_path == ("binding.entities[0].properties[1].column")
829+
assert miss[0].bq_ref == f"{table_ref}.confidence"

0 commit comments

Comments
 (0)