@@ -614,3 +614,216 @@ def test_synthetic_lineage_query(
614614 )
615615 rows = list (job .result ())
616616 assert len (rows ) > 0 , "Lineage GQL returned 0 rows"
617+
618+
619+ # ------------------------------------------------------------------ #
620+ # Binding-validator live test (issue #105 PR 2a) #
621+ # ------------------------------------------------------------------ #
622+
623+
624+ class TestBindingValidationLive :
625+ """Live validation that ``validate_binding_against_bigquery``
626+ behaves correctly against real BigQuery.
627+
628+ Self-contained: uses its own per-test scratch dataset (rather
629+ than the module-scoped fixture) because the third phase of this
630+ test deliberately drops a column via ALTER TABLE, and running
631+ destructive SQL against a shared dataset would interfere with
632+ other tests in this file.
633+
634+ Phases:
635+ 1. Materialize real tables via OntologyMaterializer.
636+ 2. Default-mode validation: report.ok must be True; warnings
637+ contain only KEY_COLUMN_NULLABLE entries (because the SDK's
638+ CREATE TABLE IF NOT EXISTS emits NULLABLE keys).
639+ 3. Strict-mode validation: same input must surface those
640+ warnings as KEY_COLUMN_NULLABLE failures, with warnings
641+ empty (escalated, not duplicated).
642+ 4. Drop the 'confidence' column via real ALTER TABLE; default-
643+ mode re-validation must emit exactly one MISSING_COLUMN
644+ failure pointing at the dropped column.
645+ """
646+
647+ @pytest .fixture (scope = "function" )
648+ def isolated_scratch (self ):
649+ """Per-test scratch dataset; cleaned up unconditionally."""
650+ from google .cloud import bigquery
651+
652+ run_id = uuid .uuid4 ().hex [:8 ]
653+ ds_id = f"bind_validate_live_{ run_id } "
654+ client = bigquery .Client (project = _PROJECT , location = _LOCATION )
655+ ds = bigquery .Dataset (f"{ _PROJECT } .{ ds_id } " )
656+ ds .location = _LOCATION
657+ ds .default_table_expiration_ms = 3600000
658+ client .create_dataset (ds , exists_ok = True )
659+ try :
660+ yield client , ds_id
661+ finally :
662+ client .delete_dataset (
663+ f"{ _PROJECT } .{ ds_id } " ,
664+ delete_contents = True ,
665+ not_found_ok = True ,
666+ )
667+
668+ @pytest .fixture (scope = "function" )
669+ def isolated_ontology_and_binding (self , isolated_scratch , tmp_path_factory ):
670+ """Per-test ontology+binding pointing at the isolated scratch."""
671+ from bigquery_ontology import load_binding
672+ from bigquery_ontology import load_ontology
673+
674+ _ , ds_id = isolated_scratch
675+ tmp = tmp_path_factory .mktemp ("bind_validate_live" )
676+
677+ ont_path = tmp / "ontology.yaml"
678+ ont_path .write_text (
679+ "ontology: BindValidatorLive\n "
680+ "entities:\n "
681+ " - name: Decision\n "
682+ " keys:\n "
683+ " primary: [decision_id]\n "
684+ " properties:\n "
685+ " - name: decision_id\n "
686+ " type: string\n "
687+ " - name: confidence\n "
688+ " type: double\n "
689+ " - name: Outcome\n "
690+ " keys:\n "
691+ " primary: [outcome_id]\n "
692+ " properties:\n "
693+ " - name: outcome_id\n "
694+ " type: string\n "
695+ "relationships:\n "
696+ " - name: HasOutcome\n "
697+ " from: Decision\n "
698+ " to: Outcome\n "
699+ " properties:\n "
700+ " - name: weight\n "
701+ " type: double\n " ,
702+ encoding = "utf-8" ,
703+ )
704+
705+ bnd_path = tmp / "binding.yaml"
706+ bnd_path .write_text (
707+ f"binding: live_check\n "
708+ f"ontology: BindValidatorLive\n "
709+ f"target:\n "
710+ f" backend: bigquery\n "
711+ f" project: { _PROJECT } \n "
712+ f" dataset: { ds_id } \n "
713+ f"entities:\n "
714+ f" - name: Decision\n "
715+ f" source: decisions\n "
716+ f" properties:\n "
717+ f" - name: decision_id\n "
718+ f" column: decision_id\n "
719+ f" - name: confidence\n "
720+ f" column: confidence\n "
721+ f" - name: Outcome\n "
722+ f" source: outcomes\n "
723+ f" properties:\n "
724+ f" - name: outcome_id\n "
725+ f" column: outcome_id\n "
726+ f"relationships:\n "
727+ f" - name: HasOutcome\n "
728+ f" source: edges\n "
729+ f" from_columns: [decision_id]\n "
730+ f" to_columns: [outcome_id]\n "
731+ f" properties:\n "
732+ f" - name: weight\n "
733+ f" column: weight\n " ,
734+ encoding = "utf-8" ,
735+ )
736+
737+ ontology = load_ontology (str (ont_path ))
738+ binding = load_binding (str (bnd_path ), ontology = ontology )
739+ return ontology , binding
740+
741+ def test_validator_end_to_end_against_real_bigquery (
742+ self , isolated_scratch , isolated_ontology_and_binding
743+ ):
744+ from bigquery_agent_analytics .binding_validation import FailureCode
745+ from bigquery_agent_analytics .binding_validation import validate_binding_against_bigquery
746+ from bigquery_agent_analytics .ontology_materializer import OntologyMaterializer
747+
748+ client , ds_id = isolated_scratch
749+ ontology , binding = isolated_ontology_and_binding
750+
751+ # Phase 1: materialize real tables.
752+ mat = OntologyMaterializer .from_ontology_binding (
753+ ontology = ontology ,
754+ binding = binding ,
755+ lineage_config = None ,
756+ write_mode = "batch_load" ,
757+ )
758+ tables = mat .create_tables ()
759+ assert set (tables .keys ()) == {
760+ "Decision" ,
761+ "Outcome" ,
762+ "HasOutcome" ,
763+ }, f"Unexpected tables created: { sorted (tables .keys ())} "
764+
765+ # Phase 2: default-mode validation. SDK-created tables must
766+ # validate clean; the only signal is advisory warnings on
767+ # NULLABLE keys.
768+ default_report = validate_binding_against_bigquery (
769+ ontology = ontology , binding = binding , bq_client = client
770+ )
771+ assert default_report .ok is True , (
772+ f"Default mode rejected SDK-created tables. Failures: "
773+ f"{ [(f .code , f .detail ) for f in default_report .failures ]} "
774+ )
775+ assert all (
776+ w .code == FailureCode .KEY_COLUMN_NULLABLE
777+ for w in default_report .warnings
778+ ), (
779+ "Only KEY_COLUMN_NULLABLE warnings expected against SDK-"
780+ "created tables. Got: "
781+ f"{ [w .code for w in default_report .warnings ]} "
782+ )
783+ # Decision.decision_id, Outcome.outcome_id (entity primary keys)
784+ # plus HasOutcome.from_columns[0]=decision_id and
785+ # HasOutcome.to_columns[0]=outcome_id (relationship endpoints).
786+ assert len (default_report .warnings ) == 4
787+
788+ # Phase 3: strict-mode escalation.
789+ strict_report = validate_binding_against_bigquery (
790+ ontology = ontology ,
791+ binding = binding ,
792+ bq_client = client ,
793+ strict = True ,
794+ )
795+ assert (
796+ strict_report .ok is False
797+ ), "Strict mode should reject NULLABLE primary-key columns"
798+ assert all (
799+ f .code == FailureCode .KEY_COLUMN_NULLABLE
800+ for f in strict_report .failures
801+ )
802+ assert len (strict_report .failures ) == 4
803+ assert strict_report .warnings == (), (
804+ "Strict mode must escalate warnings into failures, not "
805+ "double-emit them"
806+ )
807+
808+ # Phase 4: drop a non-key property column via real ALTER TABLE
809+ # and assert the validator catches the resulting drift.
810+ table_ref = f"{ _PROJECT } .{ ds_id } .decisions"
811+ client .query (f"ALTER TABLE `{ table_ref } ` DROP COLUMN confidence" ).result ()
812+
813+ broken_report = validate_binding_against_bigquery (
814+ ontology = ontology , binding = binding , bq_client = client
815+ )
816+ miss = [
817+ f
818+ for f in broken_report .failures
819+ if f .code == FailureCode .MISSING_COLUMN and "confidence" in f .bq_ref
820+ ]
821+ assert len (miss ) == 1 , (
822+ f"Expected exactly 1 MISSING_COLUMN for confidence, got "
823+ f"failures: "
824+ f"{ [(f .code , f .bq_ref ) for f in broken_report .failures ]} "
825+ )
826+ # Path must reflect binding YAML order. Decision's binding lists
827+ # decision_id at properties[0] and confidence at properties[1].
828+ assert miss [0 ].binding_path == ("binding.entities[0].properties[1].column" )
829+ assert miss [0 ].bq_ref == f"{ table_ref } .confidence"
0 commit comments