From 8605075902c7d8afc10bcf1a308d9a76a28bd2a2 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 22:17:15 +0000 Subject: [PATCH 1/6] refactor: deprecate and clean up multimodal blob APIs --- .../bigframes/bigquery/_operations/ai.py | 4 +- .../bigframes/bigframes/blob/_functions.py | 602 ---------- packages/bigframes/bigframes/dataframe.py | 53 +- packages/bigframes/bigframes/ml/llm.py | 4 +- .../bigframes/bigframes/operations/blob.py | 1029 +---------------- .../bigframes/bigframes/operations/strings.py | 29 +- packages/bigframes/bigframes/series.py | 77 +- .../bigframes/bigframes/session/__init__.py | 42 +- .../tests/system/large/blob/test_function.py | 853 -------------- .../sqlglot/expressions/test_blob_ops.py | 12 +- 10 files changed, 130 insertions(+), 2575 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/blob/test_function.py diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 7a509d4f95ff..6164c863b391 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -1003,7 +1003,7 @@ def _separate_context_and_series( if isinstance(prompt, series.Series): if prompt.dtype == dtypes.OBJ_REF_DTYPE: # Multi-model support - return [None], [prompt.blob.read_url()] + return [None], [prompt._blob._read_url()] return [None], [prompt] prompt_context: List[str | None] = [] @@ -1040,7 +1040,7 @@ def _convert_series( if result.dtype == dtypes.OBJ_REF_DTYPE: # Support multimodel - return result.blob.read_url() + return result._blob._read_url() return result diff --git a/packages/bigframes/bigframes/blob/_functions.py b/packages/bigframes/bigframes/blob/_functions.py index 5114f60058c1..3869416d1244 100644 --- a/packages/bigframes/bigframes/blob/_functions.py +++ b/packages/bigframes/bigframes/blob/_functions.py @@ -124,605 +124,3 @@ def udf(self): # TODO(b/404605969): remove cleanups when UDF fixes dataset deletion. self._session._function_session._update_temp_artifacts(udf_name, "") return self._session.read_gbq_function(udf_name) - - -def exif_func(src_obj_ref_rt: str, verbose: bool) -> str: - try: - import io - import json - - import requests - from PIL import ExifTags, Image - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - image = Image.open(io.BytesIO(bts)) - exif_data = image.getexif() - exif_dict = {} - - if exif_data: - for tag, value in exif_data.items(): - tag_name = ExifTags.TAGS.get(tag, tag) - # Convert non-serializable types to strings - try: - json.dumps(value) - exif_dict[tag_name] = value - except (TypeError, ValueError): - exif_dict[tag_name] = str(value) - - if verbose: - return json.dumps({"status": "", "content": json.dumps(exif_dict)}) - else: - return json.dumps(exif_dict) - - except Exception as e: - # Return error as JSON with error field - error_result = {"status": f"{type(e).__name__}: {str(e)}", "content": "{}"} - if verbose: - return json.dumps(error_result) - else: - return "{}" - - -exif_func_def = FunctionDef(exif_func, ["pillow", "requests"]) - - -# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string. -def image_blur_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - ksize_x: int, - ksize_y: int, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() # Raise exception for HTTP errors - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - - img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y)) - - success, encoded = cv.imencode(ext, img_blurred) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={"Content-Type": content_type}, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"]) - - -def image_blur_to_bytes_func( - src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str, verbose: bool -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y)) - success, encoded = cv.imencode(ext, img_blurred) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_blur_to_bytes_def = FunctionDef( - image_blur_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -def image_resize_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - dsize_x: int, - dsize_y: int, - fx: float, - fy: float, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy) - - success, encoded = cv.imencode(ext, img_resized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={ - "Content-Type": content_type, - }, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_resize_def = FunctionDef( - image_resize_func, ["opencv-python", "numpy", "requests"] -) - - -def image_resize_to_bytes_func( - src_obj_ref_rt: str, - dsize_x: int, - dsize_y: int, - fx: float, - fy: float, - ext: str, - verbose: bool, -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy) - success, encoded = cv.imencode(ext, img_resized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_resize_to_bytes_def = FunctionDef( - image_resize_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -def image_normalize_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - alpha: float, - beta: float, - norm_type: str, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - norm_type_mapping = { - "inf": cv.NORM_INF, - "l1": cv.NORM_L1, - "l2": cv.NORM_L2, - "minmax": cv.NORM_MINMAX, - } - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_normalized = cv.normalize( - img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type] - ) - - success, encoded = cv.imencode(ext, img_normalized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={ - "Content-Type": content_type, - }, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_normalize_def = FunctionDef( - image_normalize_func, ["opencv-python", "numpy", "requests"] -) - - -def image_normalize_to_bytes_func( - src_obj_ref_rt: str, - alpha: float, - beta: float, - norm_type: str, - ext: str, - verbose: bool, -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - norm_type_mapping = { - "inf": cv.NORM_INF, - "l1": cv.NORM_L1, - "l2": cv.NORM_L2, - "minmax": cv.NORM_MINMAX, - } - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_normalized = cv.normalize( - img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type] - ) - success, encoded = cv.imencode(ext, img_normalized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_normalize_to_bytes_def = FunctionDef( - image_normalize_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -# Extracts all text from a PDF url -def pdf_extract_func(src_obj_ref_rt: str, verbose: bool) -> str: - try: - import io - import json - - import requests - from pypdf import PdfReader # type: ignore - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30, stream=True) - response.raise_for_status() - pdf_bytes = response.content - - pdf_file = io.BytesIO(pdf_bytes) - reader = PdfReader(pdf_file, strict=False) - - all_text = "" - for page in reader.pages: - page_extract_text = page.extract_text() - if page_extract_text: - all_text += page_extract_text - - result_dict = {"status": "", "content": all_text} - - except Exception as e: - result_dict = {"status": str(e), "content": ""} - - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -pdf_extract_def = FunctionDef( - pdf_extract_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"] -) - - -# Extracts text from a PDF url and chunks it simultaneously -def pdf_chunk_func( - src_obj_ref_rt: str, chunk_size: int, overlap_size: int, verbose: bool -) -> str: - try: - import io - import json - - import requests - from pypdf import PdfReader # type: ignore - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30, stream=True) - response.raise_for_status() - pdf_bytes = response.content - - pdf_file = io.BytesIO(pdf_bytes) - reader = PdfReader(pdf_file, strict=False) - # extract and chunk text simultaneously - all_text_chunks = [] - curr_chunk = "" - for page in reader.pages: - page_text = page.extract_text() - if page_text: - curr_chunk += page_text - # split the accumulated text into chunks of a specific size with overlaop - # this loop implements a sliding window approach to create chunks - while len(curr_chunk) >= chunk_size: - split_idx = curr_chunk.rfind(" ", 0, chunk_size) - if split_idx == -1: - split_idx = chunk_size - actual_chunk = curr_chunk[:split_idx] - all_text_chunks.append(actual_chunk) - overlap = curr_chunk[split_idx + 1 : split_idx + 1 + overlap_size] - curr_chunk = overlap + curr_chunk[split_idx + 1 + overlap_size :] - if curr_chunk: - all_text_chunks.append(curr_chunk) - - result_dict = {"status": "", "content": all_text_chunks} - - except Exception as e: - result_dict = {"status": str(e), "content": []} - - if verbose: - return json.dumps(result_dict) - else: - return json.dumps(result_dict["content"]) - - -pdf_chunk_def = FunctionDef( - pdf_chunk_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"] -) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index b89360c691d3..a98a44448737 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -833,7 +833,7 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: df = self.copy() for col in blob_cols: # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. - df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + df[col] = df[col]._blob._get_runtime(mode="R", with_metadata=True) return df, blob_cols def _repr_mimebundle_(self, include=None, exclude=None): @@ -1611,7 +1611,8 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: ... + ) -> pandas.DataFrame: + ... @overload def to_pandas( @@ -1623,7 +1624,8 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def to_pandas( self, @@ -1935,7 +1937,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def drop( @@ -1947,7 +1950,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: ... + ) -> None: + ... def drop( self, @@ -2091,17 +2095,20 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: ... + ) -> None: + ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2118,7 +2125,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2127,7 +2135,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2136,7 +2145,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename_axis( self, @@ -2332,7 +2342,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def reset_index( @@ -2344,7 +2355,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: ... + ) -> None: + ... @overload def reset_index( @@ -2356,7 +2368,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: ... + ) -> Optional[DataFrame]: + ... def reset_index( self, @@ -2419,7 +2432,8 @@ def sort_index( ascending: bool = ..., inplace: Literal[False] = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_index( @@ -2428,7 +2442,8 @@ def sort_index( ascending: bool = ..., inplace: Literal[True] = ..., na_position: Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_index( self, @@ -2474,7 +2489,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_values( @@ -2485,7 +2501,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_values( self, diff --git a/packages/bigframes/bigframes/ml/llm.py b/packages/bigframes/bigframes/ml/llm.py index bcf59d591f8e..d9e228c90c9f 100644 --- a/packages/bigframes/bigframes/ml/llm.py +++ b/packages/bigframes/bigframes/ml/llm.py @@ -397,7 +397,7 @@ def predict( # TODO(garrettwu): remove transform to ObjRefRuntime when BQML supports ObjRef as input if X["content"].dtype == dtypes.OBJ_REF_DTYPE: - X["content"] = X["content"].blob._get_runtime("R", with_metadata=True) + X["content"] = X["content"]._blob._get_runtime("R", with_metadata=True) options: dict = {} @@ -731,7 +731,7 @@ def predict( isinstance(item, bigframes.series.Series) and item.dtype == dtypes.OBJ_REF_DTYPE ): - item = item.blob._get_runtime("R", with_metadata=True) + item = item._blob._get_runtime("R", with_metadata=True) df_prompt[label] = item df_prompt = df_prompt.drop(columns="bigframes_placeholder_col") diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index b9a33af2d1ed..d29d1a1202c0 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -33,134 +33,17 @@ @log_adapter.class_logger -class BlobAccessor: +class _BlobAccessor: """ - Blob functions for Series and Index. - - .. note:: - BigFrames Blob is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). + Internal blob functions for Series and Index. """ def __init__(self, data: bigframes.series.Series): self._data = data - def uri(self) -> bigframes.series.Series: - """URIs of the Blob. - - Returns: - bigframes.series.Series: URIs as string.""" - s = bigframes.series.Series(self._data._block) - - return s.struct.field("uri") - - def authorizer(self) -> bigframes.series.Series: - """Authorizers of the Blob. - - Returns: - bigframes.series.Series: Autorithers(connection) as string.""" - s = bigframes.series.Series(self._data._block) - - return s.struct.field("authorizer") - - def version(self) -> bigframes.series.Series: - """Versions of the Blob. - - Returns: - bigframes.series.Series: Version as string.""" - # version must be retrieved after fetching metadata - return self._data._apply_unary_op(ops.obj_fetch_metadata_op).struct.field( - "version" - ) - - def metadata(self) -> bigframes.series.Series: - """Retrieve the metadata of the Blob. - - Returns: - bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time). - """ - series_to_check = bigframes.series.Series(self._data._block) - # Check if it's a struct series from a verbose operation - if dtypes.is_struct_like(series_to_check.dtype): - pyarrow_dtype = series_to_check.dtype.pyarrow_dtype - if "content" in [field.name for field in pyarrow_dtype]: - content_field_type = pyarrow_dtype.field("content").type - content_bf_type = dtypes.arrow_dtype_to_bigframes_dtype( - content_field_type - ) - if content_bf_type == dtypes.OBJ_REF_DTYPE: - series_to_check = series_to_check.struct.field("content") - details_json = series_to_check._apply_unary_op( - ops.obj_fetch_metadata_op - ).struct.field("details") - import bigframes.bigquery as bbq - - return bbq.json_extract(details_json, "$.gcs_metadata").rename("metadata") - - def content_type(self) -> bigframes.series.Series: - """Retrieve the content type of the Blob. - - Returns: - bigframes.series.Series: string of the content type.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.content_type")) - .rename("content_type") - ) - - def md5_hash(self) -> bigframes.series.Series: - """Retrieve the md5 hash of the Blob. - - Returns: - bigframes.series.Series: string of the md5 hash.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.md5_hash")) - .rename("md5_hash") - ) - - def size(self) -> bigframes.series.Series: - """Retrieve the file size of the Blob. - - Returns: - bigframes.series.Series: file size in bytes.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.size")) - .rename("size") - .astype("Int64") - ) - - def updated(self) -> bigframes.series.Series: - """Retrieve the updated time of the Blob. - - Returns: - bigframes.series.Series: updated time as UTC datetime.""" - import bigframes.pandas as bpd - - updated = ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.updated")) - .rename("updated") - .astype("Int64") - ) - - return bpd.to_datetime(updated, unit="us", utc=True) - def _get_runtime( self, mode: str, with_metadata: bool = False ) -> bigframes.series.Series: - """Retrieve the ObjectRefRuntime as JSON. - - Args: - mode (str): mode for the URLs, "R" for read, "RW" for read & write. - metadata (bool, default False): whether to fetch the metadata in the ObjectRefRuntime. - - Returns: - bigframes.series.Series: ObjectRefRuntime JSON. - """ s = ( self._data._apply_unary_op(ops.obj_fetch_metadata_op) if with_metadata @@ -169,913 +52,7 @@ def _get_runtime( return s._apply_unary_op(ops.ObjGetAccessUrl(mode=mode)) - def _df_apply_udf( - self, df: bigframes.dataframe.DataFrame, udf - ) -> bigframes.series.Series: - # Catch and rethrow function axis=1 warning to be more user-friendly. - with warnings.catch_warnings(record=True) as catched_warnings: - s = df.apply(udf, axis=1) - for w in catched_warnings: - if isinstance(w.message, bfe.FunctionAxisOnePreviewWarning): - warnings.warn( - "Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.", - category=w.category, - stacklevel=2, - ) - else: - warnings.warn_explicit( - message=w.message, - category=w.category, - filename=w.filename, - lineno=w.lineno, - source=w.source, - ) - - return s - - def _apply_udf_or_raise_error( - self, df: bigframes.dataframe.DataFrame, udf, operation_name: str - ) -> bigframes.series.Series: - """Helper to apply UDF with consistent error handling.""" - try: - res = self._df_apply_udf(df, udf) - except Exception as e: - raise RuntimeError(f"{operation_name} UDF execution failed: {e}") from e - - if res is None: - raise RuntimeError(f"{operation_name} returned None result") - - return res - - def read_url(self) -> bigframes.series.Series: - """Retrieve the read URL of the Blob. - - Returns: - bigframes.series.Series: Read only URLs.""" + def _read_url(self) -> bigframes.series.Series: return self._get_runtime(mode="R")._apply_unary_op( ops.JSONValue(json_path="$.access_urls.read_url") ) - - def write_url(self) -> bigframes.series.Series: - """Retrieve the write URL of the Blob. - - Returns: - bigframes.series.Series: Writable URLs.""" - return self._get_runtime(mode="RW")._apply_unary_op( - ops.JSONValue(json_path="$.access_urls.write_url") - ) - - def display( - self, - n: int = 3, - *, - content_type: str = "", - width: Optional[int] = None, - height: Optional[int] = None, - ): - """Display the blob content in the IPython Notebook environment. Only works for image type now. - - Args: - n (int, default 3): number of sample blob objects to display. - content_type (str, default ""): content type of the blob. If unset, use the blob metadata of the storage. Possible values are "image", "audio" and "video". - width (int or None, default None): width in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_width, otherwise image/video's original size or ratio is used. No-op for other content types. - height (int or None, default None): height in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_height, otherwise image/video's original size or ratio is used. No-op for other content types. - """ - import IPython.display as ipy_display - - width = width or bigframes.options.display.blob_display_width - height = height or bigframes.options.display.blob_display_height - - # col name doesn't matter here. Rename to avoid column name conflicts - df = bigframes.series.Series(self._data._block).rename("blob_col").to_frame() - - df["read_url"] = df["blob_col"].blob.read_url() - - if content_type: - df["content_type"] = content_type - else: - df["content_type"] = df["blob_col"].blob.content_type() - - pandas_df, _, query_job = df._block.retrieve_repr_request_results(n) - df._set_internal_query_job(query_job) - - def display_single_url( - read_url: Union[str, pd._libs.missing.NAType], - content_type: Union[str, pd._libs.missing.NAType], - ): - if pd.isna(read_url): - ipy_display.display("") - return - - if pd.isna(content_type): # display as raw data or error - response = requests.get(read_url) - ipy_display.display(response.content) - return - - content_type = cast(str, content_type).casefold() - - if content_type.startswith("image"): - ipy_display.display( - ipy_display.Image(url=read_url, width=width, height=height) - ) - elif content_type.startswith("audio"): - # using url somehow doesn't work with audios - response = requests.get(read_url) - ipy_display.display(ipy_display.Audio(response.content)) - elif content_type.startswith("video"): - ipy_display.display( - ipy_display.Video(read_url, width=width, height=height) - ) - else: # display as raw data - response = requests.get(read_url) - ipy_display.display(response.content) - - for _, row in pandas_df.iterrows(): - display_single_url(row["read_url"], row["content_type"]) - - @property - def session(self): - return self._data._block.session - - def _resolve_connection(self, connection: Optional[str] = None) -> str: - """Resovle the BigQuery connection. - - Args: - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" is - str. If None, uses default connection of the session. - - Returns: - str: the resolved BigQuery connection string in the format: - "project.location.connection_id". - - Raises: - ValueError: If the connection cannot be resolved to a valid string. - """ - connection = connection or self._data._block.session.bq_connection - return clients.get_canonical_bq_connection_id( - connection, - default_project=self._data._block.session._project, - default_location=self._data._block.session._location, - ) - - def get_runtime_json_str( - self, mode: str = "R", *, with_metadata: bool = False - ) -> bigframes.series.Series: - """Get the runtime (contains signed URL to access gcs data) and apply the ToJSONSTring transformation. - - Args: - mode(str or str, default "R"): the mode for accessing the runtime. - Default to "R". Possible values are "R" (read-only) and - "RW" (read-write) - with_metadata (bool, default False): whether to include metadata - in the JSON string. Default to False. - - Returns: - str: the runtime object in the JSON string. - """ - runtime = self._get_runtime(mode=mode, with_metadata=with_metadata) - return runtime._apply_unary_op(ops.ToJSONString()) - - def exif( - self, - *, - engine: Literal[None, "pillow"] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extract EXIF data. Now only support image types. - - Args: - engine ('pillow' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: JSON series of key-value pairs if verbose=False, or struct with status and content if verbose=True. - - Raises: - ValueError: If engine is not 'pillow'. - RuntimeError: If EXIF extraction fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pillow": - raise ValueError("Must specify the engine, supported value is 'pillow'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - df["verbose"] = verbose - - exif_udf = blob_func.TransformFunction( - blob_func.exif_func_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - res = self._apply_udf_or_raise_error(df, exif_udf, "EXIF extraction") - - if verbose: - try: - exif_content_series = bbq.parse_json( - res._apply_unary_op(ops.JSONValue(json_path="$.content")) - ).rename("exif_content") - exif_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - except Exception as e: - raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e - results_df = bpd.DataFrame( - {"status": exif_status_series, "content": exif_content_series} - ) - results_struct = bbq.struct(results_df).rename("exif_results") - return results_struct - else: - try: - return bbq.parse_json(res) - except Exception as e: - raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e - - def image_blur( - self, - ksize: tuple[int, int], - *, - engine: Literal[None, "opencv"] = None, - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Blurs images. - - Args: - ksize (tuple(int, int)): Kernel size. - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image blur operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_blur_udf = blob_func.TransformFunction( - blob_func.image_blur_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["ksize_x"], df["ksize_y"] = ksize - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur") - - if verbose: - blurred_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - blurred_content_series = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[blurred_content_b64_series] - ) - blurred_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": blurred_status_series, "content": blurred_content_series} - ) - results_struct = bbq.struct(results_df).rename("blurred_results") - return results_struct - else: - blurred_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("blurred_bytes") - return blurred_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_blur_udf = blob_func.TransformFunction( - blob_func.image_blur_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["ksize_x"], df["ksize_y"] = ksize - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur") - res.cache() # to execute the udf - - if verbose: - blurred_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": blurred_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("blurred_results") - return results_struct - else: - return dst - - def image_resize( - self, - dsize: tuple[int, int] = (0, 0), - *, - engine: Literal[None, "opencv"] = None, - fx: float = 0.0, - fy: float = 0.0, - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ): - """Resize images. - - Args: - dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size. - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size. - fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size. - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image resize operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - dsize_set = dsize[0] > 0 and dsize[1] > 0 - fsize_set = fx > 0.0 and fy > 0.0 - if not dsize_set ^ fsize_set: - raise ValueError( - "Only one of dsize or (fx, fy) parameters must be set. And the set values must be positive. " - ) - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_resize_udf = blob_func.TransformFunction( - blob_func.image_resize_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["dsize_x"], df["dsize_y"] = dsize - df["fx"], df["fy"] = fx, fy - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize") - - if verbose: - resized_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - resized_content_series = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[resized_content_b64_series] - ) - - resized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": resized_status_series, "content": resized_content_series} - ) - results_struct = bbq.struct(results_df).rename("resized_results") - return results_struct - else: - resized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("resized_bytes") - return resized_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_resize_udf = blob_func.TransformFunction( - blob_func.image_resize_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["dsize_x"], df["dsize_y"] = dsize - df["fx"], df["fy"] = fx, fy - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize") - res.cache() # to execute the udf - - if verbose: - resized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": resized_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("resized_results") - return results_struct - else: - return dst - - def image_normalize( - self, - *, - engine: Literal[None, "opencv"] = None, - alpha: float = 1.0, - beta: float = 0.0, - norm_type: str = "l2", - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Normalize images. - - Args: - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization. - beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization. - norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax". - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image normalize operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_normalize_udf = blob_func.TransformFunction( - blob_func.image_normalize_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["alpha"] = alpha - df["beta"] = beta - df["norm_type"] = norm_type - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error( - df, image_normalize_udf, "Image normalize" - ) - - if verbose: - normalized_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - normalized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[normalized_content_b64_series] - ) - normalized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": normalized_status_series, "content": normalized_bytes} - ) - results_struct = bbq.struct(results_df).rename("normalized_results") - return results_struct - else: - normalized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("normalized_bytes") - return normalized_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_normalize_udf = blob_func.TransformFunction( - blob_func.image_normalize_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["alpha"] = alpha - df["beta"] = beta - df["norm_type"] = norm_type - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_normalize_udf, "Image normalize") - res.cache() # to execute the udf - - if verbose: - normalized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": normalized_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("normalized_results") - return results_struct - else: - return dst - - def pdf_extract( - self, - *, - engine: Literal[None, "pypdf"] = None, - connection: Optional[str] = None, - max_batching_rows: int = 1, - container_cpu: Union[float, int] = 2, - container_memory: str = "1Gi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extracts text from PDF URLs and saves the text as string. - - Args: - engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - max_batching_rows (int, default 1): Max number of rows per batch - send to cloud run to execute the function. - container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "1Gi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the extracted content - are displayed. Conversely, when set to False, only the extracted - content is presented, suppressing error messages. - - Returns: - bigframes.series.Series: str or struct[str, str], - depend on the "verbose" parameter. - Contains the extracted text from the PDF file. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'pypdf'. - RuntimeError: If PDF extraction fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pypdf": - raise ValueError("Must specify the engine, supported value is 'pypdf'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - - pdf_extract_udf = blob_func.TransformFunction( - blob_func.pdf_extract_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df = self.get_runtime_json_str(mode="R").to_frame() - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, pdf_extract_udf, "PDF extraction") - - if verbose: - # Extract content with error handling - try: - content_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - except Exception as e: - raise RuntimeError( - f"Failed to extract content field from PDF result: {e}" - ) from e - try: - status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status")) - except Exception as e: - raise RuntimeError( - f"Failed to extract status field from PDF result: {e}" - ) from e - - res_df = bpd.DataFrame({"status": status_series, "content": content_series}) - struct_series = bbq.struct(res_df).rename("extracted_results") - return struct_series - else: - return res.rename("extracted_content") - - def pdf_chunk( - self, - *, - engine: Literal[None, "pypdf"] = None, - connection: Optional[str] = None, - chunk_size: int = 2000, - overlap_size: int = 200, - max_batching_rows: int = 1, - container_cpu: Union[float, int] = 2, - container_memory: str = "1Gi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extracts and chunks text from PDF URLs and saves the text as - arrays of strings. - - Args: - engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - chunk_size (int, default 2000): the desired size of each text chunk - (number of characters). - overlap_size (int, default 200): the number of overlapping characters - between consective chunks. The helps to ensure context is - perserved across chunk boundaries. - max_batching_rows (int, default 1): Max number of rows per batch - send to cloud run to execute the function. - container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "1Gi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the extracted content - are displayed. Conversely, when set to False, only the extracted - content is presented, suppressing error messages. - - Returns: - bigframe.series.Series: array[str] or struct[str, array[str]], - depend on the "verbose" parameter. - where each string is a chunk of text extracted from PDF. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'pypdf'. - RuntimeError: If PDF chunking fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pypdf": - raise ValueError("Must specify the engine, supported value is 'pypdf'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - - if chunk_size <= 0: - raise ValueError("chunk_size must be a positive integer.") - if overlap_size < 0: - raise ValueError("overlap_size must be a non-negative integer.") - if overlap_size >= chunk_size: - raise ValueError("overlap_size must be smaller than chunk_size.") - - pdf_chunk_udf = blob_func.TransformFunction( - blob_func.pdf_chunk_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df = self.get_runtime_json_str(mode="R").to_frame() - df["chunk_size"] = chunk_size - df["overlap_size"] = overlap_size - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, pdf_chunk_udf, "PDF chunking") - - try: - content_series = bbq.json_extract_string_array(res, "$.content") - except Exception as e: - raise RuntimeError( - f"Failed to extract content array from PDF chunk result: {e}" - ) from e - - if verbose: - try: - status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status")) - except Exception as e: - raise RuntimeError( - f"Failed to extract status field from PDF chunk result: {e}" - ) from e - - results_df = bpd.DataFrame( - {"status": status_series, "content": content_series} - ) - resultes_struct = bbq.struct(results_df).rename("chunked_results") - return resultes_struct - else: - return bbq.json_extract_string_array(res, "$").rename("chunked_content") - - def audio_transcribe( - self, - *, - engine: Literal["bigquery"] = "bigquery", - connection: Optional[str] = None, - model_name: Optional[ - Literal[ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ] - ] = None, - verbose: bool = False, - ) -> bigframes.series.Series: - """ - Transcribe audio content using a Gemini multimodal model. - - Args: - engine ('bigquery'): The engine (bigquery or third party library) used for the function. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - model_name (str): The model for natural language tasks. Accepted - values are "gemini-2.0-flash-lite-001", and "gemini-2.0-flash-001". - See "https://ai.google.dev/gemini-api/docs/models" for model choices. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the transcribed content - are displayed. Conversely, when set to False, only the transcribed - content is presented, suppressing error messages. - - Returns: - bigframes.series.Series: str or struct[str, str], - depend on the "verbose" parameter. - Contains the transcribed text from the audio file. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'bigquery'. - RuntimeError: If the transcription result structure is invalid. - """ - if engine.casefold() != "bigquery": - raise ValueError("Must specify the engine, supported value is 'bigquery'.") - - import bigframes.bigquery as bbq - import bigframes.pandas as bpd - - # col name doesn't matter here. Rename to avoid column name conflicts - audio_series = bigframes.series.Series(self._data._block) - - prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio." - - # Convert the audio series to the runtime representation required by the model. - audio_runtime = audio_series.blob._get_runtime("R", with_metadata=True) - - transcribed_results = bbq.ai.generate( - prompt=(prompt_text, audio_runtime), - connection_id=connection, - endpoint=model_name, - model_params={"generationConfig": {"temperature": 0.0}}, - ) - - # Validate that the result is not None - if transcribed_results is None: - raise RuntimeError("Transcription returned None result") - - transcribed_content_series = transcribed_results.struct.field("result").rename( - "transcribed_content" - ) - - if verbose: - transcribed_status_series = transcribed_results.struct.field("status") - results_df = bpd.DataFrame( - { - "status": transcribed_status_series, - "content": transcribed_content_series, - } - ) - results_struct = bbq.struct(results_df).rename("transcription_results") - return results_struct - else: - return transcribed_content_series.rename("transcribed_content") diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 26ff2616a1b7..7cc93d34c07a 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -305,6 +305,18 @@ def join(self, sep: str) -> T: ops.ArrayReduceOp(aggregation=agg_ops.StringAggOp(sep=sep)) ) + def _to_blob(self, connection: Optional[str] = None) -> T: + import bigframes.core.blocks + + if hasattr(self._data, "_block") and isinstance( + self._data._block, bigframes.core.blocks.Block + ): + session = self._data._block.session + else: + raise ValueError("to_blob is only supported via Series.str") + connection = session._create_bq_connection(connection=connection) + return self._data._apply_binary_op(connection, ops.obj_make_ref_op) + def to_blob(self, connection: Optional[str] = None) -> T: """Create a BigFrames Blob series from a series of URIs. @@ -325,16 +337,15 @@ def to_blob(self, connection: Optional[str] = None) -> T: bigframes.series.Series: Blob Series. """ - import bigframes.core.blocks + import warnings + import bigframes.exceptions as bfe - if hasattr(self._data, "_block") and isinstance( - self._data._block, bigframes.core.blocks.Block - ): - session = self._data._block.session - else: - raise ValueError("to_blob is only supported via Series.str") - connection = session._create_bq_connection(connection=connection) - return self._data._apply_binary_op(connection, ops.obj_make_ref_op) + warnings.warn( + "Series.str.to_blob is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", + category=bfe.ApiDeprecationWarning, + stacklevel=2, + ) + return self._to_blob(connection) def _parse_flags(flags: int) -> Optional[str]: diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index fbcc949855c2..3d29c19b9e41 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -321,16 +321,8 @@ def list(self) -> lists.ListAccessor: return lists.ListAccessor(self) @property - def blob(self) -> blob.BlobAccessor: - """ - Accessor for Blob operations. - """ - warnings.warn( - "The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - return blob.BlobAccessor(self) + def _blob(self) -> blob._BlobAccessor: + return blob._BlobAccessor(self) @property @validations.requires_ordering() @@ -383,7 +375,8 @@ def copy(self) -> Series: def rename( self, index: Union[blocks.Label, Mapping[Any, Any]] = None, - ) -> Series: ... + ) -> Series: + ... @overload def rename( @@ -392,7 +385,8 @@ def rename( *, inplace: Literal[False], **kwargs, - ) -> Series: ... + ) -> Series: + ... @overload def rename( @@ -401,7 +395,8 @@ def rename( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename( self, @@ -462,7 +457,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> Series: ... + ) -> Series: + ... @overload def rename_axis( @@ -471,7 +467,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> Series: ... + ) -> Series: + ... @overload def rename_axis( @@ -480,7 +477,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... @validations.requires_index def rename_axis( @@ -524,7 +522,8 @@ def reset_index( drop: Literal[False] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> bigframes.dataframe.DataFrame: ... + ) -> bigframes.dataframe.DataFrame: + ... @overload def reset_index( @@ -535,7 +534,8 @@ def reset_index( drop: Literal[True] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> Series: ... + ) -> Series: + ... @overload def reset_index( @@ -546,7 +546,8 @@ def reset_index( drop: bool = ..., inplace: Literal[True] = ..., allow_duplicates: Optional[bool] = ..., - ) -> None: ... + ) -> None: + ... @validations.requires_ordering() def reset_index( @@ -1539,9 +1540,9 @@ def ne(self, other: object) -> Series: def items(self): for batch_df in self._block.to_pandas_batches(): - assert batch_df.shape[1] == 1, ( - f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." - ) + assert ( + batch_df.shape[1] == 1 + ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." for item in batch_df.squeeze(axis=1).items(): yield item @@ -1771,7 +1772,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... @typing.overload def sort_values( @@ -1782,7 +1784,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> Series: ... + ) -> Series: + ... def sort_values( self, @@ -1813,12 +1816,14 @@ def sort_values( @typing.overload # type: ignore[override] def sort_index( self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=... - ) -> Series: ... + ) -> Series: + ... @typing.overload def sort_index( self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=... - ) -> None: ... + ) -> None: + ... @validations.requires_index def sort_index( @@ -2693,28 +2698,18 @@ def _apply_binary_aggregation( @typing.overload def _align( self, other: Series, how="outer" - ) -> tuple[ - ex.DerefOp, - ex.DerefOp, - blocks.Block, - ]: ... + ) -> tuple[ex.DerefOp, ex.DerefOp, blocks.Block,]: + ... @typing.overload def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ - ex.DerefOp, - AlignedExprT, - blocks.Block, - ]: ... + ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: + ... def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ - ex.DerefOp, - AlignedExprT, - blocks.Block, - ]: + ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: """Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression.""" values, block = self._align_n( [ diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index a6bb3041764c..ea36cc1925f1 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,7 +432,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq( @@ -448,7 +449,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq( self, @@ -520,7 +522,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def _read_gbq_colab( @@ -529,7 +532,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -590,7 +594,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -606,7 +611,8 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, @@ -753,7 +759,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -767,7 +774,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -918,7 +926,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: ... + ) -> bigframes.core.indexes.Index: + ... @typing.overload def read_pandas( @@ -926,7 +935,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: ... + ) -> bigframes.series.Series: + ... @typing.overload def read_pandas( @@ -934,7 +944,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... def read_pandas( self, @@ -2248,12 +2259,17 @@ def from_glob_path( bigframes.pandas.DataFrame: Result BigFrames DataFrame. """ + warnings.warn( + "from_glob_path is deprecated and will be removed in a future release. Use read_gbq with 'ref' column instead.", + category=bfe.ApiDeprecationWarning, + stacklevel=2, + ) # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. connection = self._create_bq_connection(connection=connection) table = self._create_object_table(path, connection) - s = self._loader.read_gbq_table(table)["uri"].str.to_blob(connection) + s = self._loader.read_gbq_table(table)["uri"].str._to_blob(connection) return s.rename(name).to_frame() def _create_bq_connection( @@ -2312,7 +2328,7 @@ def read_gbq_object_table( table = self.bqclient.get_table(object_table) connection = table._properties["externalDataConfiguration"]["connectionId"] - s = self._loader.read_gbq_table(object_table)["uri"].str.to_blob(connection) + s = self._loader.read_gbq_table(object_table)["uri"].str._to_blob(connection) return s.rename(name).to_frame() # ========================================================================= diff --git a/packages/bigframes/tests/system/large/blob/test_function.py b/packages/bigframes/tests/system/large/blob/test_function.py deleted file mode 100644 index bc09baf268d1..000000000000 --- a/packages/bigframes/tests/system/large/blob/test_function.py +++ /dev/null @@ -1,853 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import traceback -import uuid -from typing import Generator - -import pandas as pd -import pytest -from google.cloud import storage - -import bigframes -import bigframes.pandas as bpd -from bigframes import dtypes - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -@pytest.fixture(scope="function") -def images_output_folder() -> Generator[str, None, None]: - id = uuid.uuid4().hex - folder = os.path.join("gs://bigframes_blob_test/output/", id) - yield folder - - # clean up - try: - cloud_storage_client = storage.Client() - bucket = cloud_storage_client.bucket("bigframes_blob_test") - blobs = bucket.list_blobs(prefix="output/" + id) - for blob in blobs: - blob.delete() - except Exception as exc: - traceback.print_exception(type(exc), exc, None) - - -@pytest.fixture(scope="function") -def images_output_uris(images_output_folder: str) -> list[str]: - return [ - os.path.join(images_output_folder, "img0.jpg"), - os.path.join(images_output_folder, "img1.jpg"), - ] - - -def test_blob_exif( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=False - ) - expected = bpd.Series( - ['{"ExifOffset": 47, "Make": "MyCamera"}'], - session=session, - dtype=dtypes.JSON_DTYPE, - ) - pd.testing.assert_series_equal( - actual.to_pandas(), - expected.to_pandas(), - check_dtype=False, - check_index_type=False, - ) - - -def test_blob_exif_verbose( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=True - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.JSON_DTYPE - - -def test_blob_image_blur_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=False - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=True - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - # Content should be blob objects for GCS destination - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not content_series.blob.size().isna().any() - - -def test_blob_image_resize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_pdf_extract( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=False, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - actual_text = actual[actual != ""].iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_extract_verbose( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=True, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = successful_results.apply(lambda x: x["content"]).iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=False, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # First entry is NA - actual_text = "".join(actual.dropna()) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=True, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = "".join(successful_results.apply(lambda x: x["content"]).iloc[0]) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ], -) -def test_blob_transcribe( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=False, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=False failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in transcribed text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ], -) -def test_blob_transcribe_verbose( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=True, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0]["content"] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=True failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in transcribed text. " - ) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 4bfd50fef4ec..7130c7ac1610 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -21,15 +21,9 @@ pytest.importorskip("pytest_snapshot") -def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.version().to_frame().sql - snapshot.assert_match(sql, "out.sql") - - def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.read_url().to_frame().sql + blob_s = scalar_types_df["string_col"].str._to_blob() + sql = blob_s._blob._read_url().to_frame().sql snapshot.assert_match(sql, "out.sql") @@ -45,7 +39,7 @@ def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapsh def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): - blob_df = scalar_types_df["string_col"].str.to_blob() + blob_df = scalar_types_df["string_col"].str._to_blob() snapshot.assert_match(blob_df.to_frame().sql, "out.sql") From 1624846cc7ca387773f29ae3f0d4c923eeab573a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:00:30 +0000 Subject: [PATCH 2/6] refactor: cleanup blob API references --- packages/bigframes/docs/templates/toc.yml | 3 - ...with-bigframes-over-national-jukebox.ipynb | 701 +++++++++--------- .../tests/system/small/blob/test_io.py | 132 ---- .../system/small/blob/test_properties.py | 119 --- .../tests/system/small/blob/test_urls.py | 31 - 5 files changed, 364 insertions(+), 622 deletions(-) delete mode 100644 packages/bigframes/tests/system/small/blob/test_io.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_properties.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_urls.py diff --git a/packages/bigframes/docs/templates/toc.yml b/packages/bigframes/docs/templates/toc.yml index 5d043fd85f2a..562b857fee5c 100644 --- a/packages/bigframes/docs/templates/toc.yml +++ b/packages/bigframes/docs/templates/toc.yml @@ -87,9 +87,6 @@ uid: bigframes.operations.lists.ListAccessor - name: PlotAccessor uid: bigframes.operations.plotting.PlotAccessor - - name: BlobAccessor - uid: bigframes.operations.blob.BlobAccessor - status: beta name: Series - name: Window uid: bigframes.core.window.Window diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index fe2d567d1b31..e70ddfe4a845 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,23 +1,8 @@ { "cells": [ { + "id": "c62e292f", "cell_type": "markdown", - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" - } - } - } - }, - "editable": true, - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -35,42 +20,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "216%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "194%" } } } }, + "editable": true, "slideshow": { - "slide_type": "slide" - } + "slide_type": "subslide" + }, + "tags": [] }, + "execution_count": null + }, + { + "id": "7dc312a4", + "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\"recording" - ] - }, - { - "cell_type": "markdown", + "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "0", - "zoom": "181%" + "zoom": "216%" } } } @@ -79,11 +64,16 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "07dcae4b", + "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\"audio\n", + "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -96,16 +86,14 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "275%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "z-index": "0", + "zoom": "181%" } } } @@ -114,20 +102,43 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "8dd2ddab", + "cell_type": "markdown", "source": [ "## Getting started with BigQuery DataFrames (bigframes)\n", "\n", "Install the bigframes package." - ] + ], + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "275%" + } + } + } + }, + "slideshow": { + "slide_type": "slide" + } + }, + "execution_count": null }, { + "id": "96cda443", "cell_type": "code", - "execution_count": null, + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "214%" } } @@ -142,18 +153,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ] + "execution_count": null }, { + "id": "acf12472", "cell_type": "markdown", + "source": [ + "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "4", "zoom": "236%" } @@ -161,15 +175,17 @@ } } }, - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ] + "execution_count": null }, { + "id": "fd321077", "cell_type": "code", - "execution_count": null, + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -180,22 +196,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ] + "execution_count": null }, { + "id": "4d837a34", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "193%" } } @@ -210,24 +229,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ] + "execution_count": null }, { + "id": "008f0a87", "cell_type": "markdown", + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "207%" } } @@ -237,20 +253,24 @@ "slide_type": "slide" } }, - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ] + "execution_count": null }, { + "id": "9a4b35ab", "cell_type": "code", - "execution_count": null, + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "225%" } } @@ -265,24 +285,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ] + "execution_count": null }, { + "id": "e00dcb01", "cell_type": "code", - "execution_count": null, + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "122%" } } @@ -300,20 +316,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ] + "execution_count": null }, { + "id": "335511be", "cell_type": "code", - "execution_count": null, + "source": [ + "df.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "134%" } } @@ -328,14 +343,17 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df.shape" - ] + "execution_count": null }, { + "id": "595126a1", "cell_type": "code", - "execution_count": null, + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -346,22 +364,36 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ] + "execution_count": null }, { + "id": "cbd59dd9", "cell_type": "code", - "execution_count": null, + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "161%" } } @@ -379,31 +411,14 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ] + "execution_count": null }, { + "id": "84548649", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -414,18 +429,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "flattened.shape" - ] + "execution_count": null }, { + "id": "8be3127f", "cell_type": "markdown", + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "216%" } } @@ -437,18 +453,25 @@ }, "tags": [] }, - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ] + "execution_count": null }, { + "id": "31277e21", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened = flattened.assign(**{\n", + " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", + " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", + "})\n", + "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", + "# Note: str.to_blob is deprecated.\n", + "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "211%" } } @@ -468,23 +491,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened = flattened.assign(**{\n", - " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", - " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", - "})\n", - "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", - "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" - ] + "execution_count": null }, { + "id": "d27756f5", "cell_type": "markdown", + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "317%" } } @@ -496,13 +515,20 @@ }, "tags": [] }, - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ] + "execution_count": null }, { + "id": "d1f7ad46", "cell_type": "code", - "execution_count": null, + "source": [ + "# Note: .blob.audio_transcribe is removed. This cell will fail.\n", + "# Use bigframes.bigquery.ai.generate instead.\n", + "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", + " model_name=\"gemini-2.0-flash-001\",\n", + " verbose=True,\n", + ")\n", + "flattened[\"Transcription\"]" + ], "metadata": { "editable": true, "execution": { @@ -518,22 +544,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", - " model_name=\"gemini-2.0-flash-001\",\n", - " verbose=True,\n", - ")\n", - "flattened[\"Transcription\"]" - ] + "execution_count": null }, { + "id": "1575c468", "cell_type": "markdown", + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "229%" } } @@ -543,18 +566,21 @@ "slide_type": "slide" } }, - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ] + "execution_count": null }, { + "id": "e53c7a0b", "cell_type": "code", - "execution_count": null, + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "177%" } } @@ -574,21 +600,20 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ] + "execution_count": null }, { + "id": "3629f4af", "cell_type": "code", - "execution_count": null, + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "141%" } } @@ -603,20 +628,23 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ] + "execution_count": null }, { + "id": "09ef6c3d", "cell_type": "code", - "execution_count": null, + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -634,23 +662,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ] + "execution_count": null }, { + "id": "cf15986a", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -670,25 +701,23 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "778d0ac3", "cell_type": "markdown", + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "181%" } } @@ -698,22 +727,21 @@ "slide_type": "slide" } }, - "source": [ - "## Creating a searchable index\n", - "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", - "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ] + "execution_count": null }, { + "id": "de7e4e11", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "163%" } } @@ -728,21 +756,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ] + "execution_count": null }, { + "id": "4acfb495", "cell_type": "code", - "execution_count": null, + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "125%" } } @@ -757,25 +789,22 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ] + "execution_count": null }, { + "id": "a49d1dde", "cell_type": "code", - "execution_count": null, + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "178%" } } @@ -795,39 +824,39 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ] + "execution_count": null }, { + "id": "15a5bfd3", "cell_type": "markdown", + "source": [ + "We're now ready to save this to a table." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "224%" } } } } }, - "source": [ - "We're now ready to save this to a table." - ] + "execution_count": null }, { + "id": "8b49384c", "cell_type": "code", - "execution_count": null, + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "172%" } } @@ -842,19 +871,24 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ] + "execution_count": null }, { + "id": "810c77d5", "cell_type": "markdown", + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "183%" } } @@ -864,23 +898,22 @@ "slide_type": "slide" } }, - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ] + "execution_count": null }, { + "id": "fb63ad94", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "92%" } } @@ -898,22 +931,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ] + "execution_count": null }, { + "id": "f19c88d3", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "127%" } } @@ -928,26 +965,28 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ] + "execution_count": null }, { + "id": "06f0312e", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "175%" } } @@ -967,23 +1006,14 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ] + "execution_count": null }, { + "id": "fae3fcae", "cell_type": "code", - "execution_count": null, + "source": [ + "vector_search_results.dtypes" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -994,19 +1024,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "vector_search_results.dtypes" - ] + "execution_count": null }, { + "id": "38423dde", "cell_type": "code", - "execution_count": null, + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "158%" } } @@ -1024,20 +1055,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ] + "execution_count": null }, { + "id": "37a1dfbd", "cell_type": "code", - "execution_count": null, + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "138%" } } @@ -1052,14 +1082,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ] + "execution_count": null }, { + "id": "a4748e0f", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "editable": true, "execution": { @@ -1076,26 +1113,16 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "ff22e7eb", "cell_type": "code", - "execution_count": null, + "source": [], "metadata": { "trusted": true }, - "outputs": [], - "source": [] + "execution_count": null } ], "metadata": { @@ -1132,6 +1159,6 @@ "version": "3.11.13" } }, - "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 4, + "nbformat": 4 } diff --git a/packages/bigframes/tests/system/small/blob/test_io.py b/packages/bigframes/tests/system/small/blob/test_io.py deleted file mode 100644 index c89fb4c6e6ed..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_io.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import mock - -import pandas as pd -import pytest - -import bigframes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -idisplay = pytest.importorskip("IPython.display") - - -def test_blob_create_from_uri_str( - bq_connection: str, session: bigframes.Session, images_uris -): - uri_series = bpd.Series(images_uris, session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - - pd_blob_df = blob_series.struct.explode().to_pandas() - expected_pd_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_from_glob_path( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - blob_df = session.from_glob_path( - images_gcs_path, connection=bq_connection, name="blob_col" - ) - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_read_gbq_object_table( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - obj_table = session._create_object_table(images_gcs_path, bq_connection) - - blob_df = session.read_gbq_object_table(obj_table, name="blob_col") - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - images_mm_df["blob_col"].blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert isinstance(arg, idisplay.Image) - - -def test_display_nulls( - monkeypatch, - bq_connection: str, - session: bigframes.Session, -): - uri_series = bpd.Series([None, None, None], dtype="string", session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - blob_series.blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert arg == "" diff --git a/packages/bigframes/tests/system/small/blob/test_properties.py b/packages/bigframes/tests/system/small/blob/test_properties.py deleted file mode 100644 index f63de38a8ce9..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_properties.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd -import pytest - -import bigframes.dtypes as dtypes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.uri().to_pandas() - expected = pd.Series(images_uris, name="uri") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.authorizer().to_pandas() - expected = pd.Series( - [bq_connection.casefold(), bq_connection.casefold()], name="authorizer" - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_version(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.version().to_pandas() - expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_metadata(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.metadata().to_pandas() - expected = pd.Series( - [ - ( - '{"content_type":"image/jpeg",' - '"md5_hash":"e130ad042261a1883cd2cc06831cf748",' - '"size":338390,' - '"updated":1753907851000000}' - ), - ( - '{"content_type":"image/jpeg",' - '"md5_hash":"e2ae3191ff2b809fd0935f01a537c650",' - '"size":43333,' - '"updated":1753907851000000}' - ), - ], - name="metadata", - dtype=dtypes.JSON_DTYPE, - ) - expected.index = expected.index.astype(dtypes.INT_DTYPE) - pd.testing.assert_series_equal(actual, expected) - - -def test_blob_content_type(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.content_type().to_pandas() - expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_md5_hash(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas() - expected = pd.Series( - ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"], - name="md5_hash", - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_size(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.size().to_pandas() - expected = pd.Series([338390, 43333], name="size") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_updated(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.updated().to_pandas() - expected = pd.Series( - [ - pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), - pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), - ], - name="updated", - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) diff --git a/packages/bigframes/tests/system/small/blob/test_urls.py b/packages/bigframes/tests/system/small/blob/test_urls.py deleted file mode 100644 index b2dd6604343e..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_urls.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_read_url(images_mm_df: bpd.DataFrame): - urls = images_mm_df["blob_col"].blob.read_url() - - assert urls.str.startswith("https://storage.googleapis.com/").all() - - -def test_blob_write_url(images_mm_df: bpd.DataFrame): - urls = images_mm_df["blob_col"].blob.write_url() - - assert urls.str.startswith("https://storage.googleapis.com/").all() From b9476e683f180374e85d805ab9b2be641e7f6380 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:06:16 +0000 Subject: [PATCH 3/6] style: fix formatting and clean up imports --- packages/bigframes/bigframes/operations/blob.py | 8 -------- packages/bigframes/bigframes/session/polars_executor.py | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index d29d1a1202c0..9cd7dd0db291 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -14,18 +14,10 @@ from __future__ import annotations -import os -import warnings -from typing import Literal, Optional, Union, cast - -import pandas as pd -import requests import bigframes.dataframe -import bigframes.exceptions as bfe import bigframes.operations as ops import bigframes.series -from bigframes import clients, dtypes from bigframes.core.logging import log_adapter FILE_FOLDER_REGEX = r"^.*\/(.*)$" diff --git a/packages/bigframes/bigframes/session/polars_executor.py b/packages/bigframes/bigframes/session/polars_executor.py index 43e3609ac3c1..06c7fcb925c4 100644 --- a/packages/bigframes/bigframes/session/polars_executor.py +++ b/packages/bigframes/bigframes/session/polars_executor.py @@ -122,7 +122,7 @@ def _is_node_polars_executable(node: nodes.BigFrameNode): return False for expr in node._node_expressions: if isinstance(expr, agg_expressions.Aggregation): - if not type(expr.op) in _COMPATIBLE_AGG_OPS: + if type(expr.op) not in _COMPATIBLE_AGG_OPS: return False if isinstance(expr, expression.Expression): if not set(map(type, _get_expr_ops(expr))).issubset(_COMPATIBLE_SCALAR_OPS): From d84ec947309ea54eec3ddc74d2678e2262e7e4af Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:11:51 +0000 Subject: [PATCH 4/6] style: enforce ruff formatting --- packages/bigframes/.python-version | 1 + packages/bigframes/bigframes/dataframe.py | 51 +++++---------- packages/bigframes/bigframes/series.py | 65 +++++++++---------- .../bigframes/bigframes/session/__init__.py | 33 ++++------ 4 files changed, 60 insertions(+), 90 deletions(-) create mode 100644 packages/bigframes/.python-version diff --git a/packages/bigframes/.python-version b/packages/bigframes/.python-version new file mode 100644 index 000000000000..95ed564f82b7 --- /dev/null +++ b/packages/bigframes/.python-version @@ -0,0 +1 @@ +3.14.2 diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index a98a44448737..b0ea81e003e1 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -1611,8 +1611,7 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: - ... + ) -> pandas.DataFrame: ... @overload def to_pandas( @@ -1624,8 +1623,7 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def to_pandas( self, @@ -1937,8 +1935,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def drop( @@ -1950,8 +1947,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... def drop( self, @@ -2095,20 +2091,17 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: - ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: - ... + ) -> None: ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2125,8 +2118,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2135,8 +2127,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2145,8 +2136,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename_axis( self, @@ -2342,8 +2332,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def reset_index( @@ -2355,8 +2344,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: - ... + ) -> None: ... @overload def reset_index( @@ -2368,8 +2356,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: - ... + ) -> Optional[DataFrame]: ... def reset_index( self, @@ -2432,8 +2419,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[False] = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_index( @@ -2442,8 +2428,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[True] = ..., na_position: Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_index( self, @@ -2489,8 +2474,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_values( @@ -2501,8 +2485,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_values( self, diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 3d29c19b9e41..17addef1ab0a 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -375,8 +375,7 @@ def copy(self) -> Series: def rename( self, index: Union[blocks.Label, Mapping[Any, Any]] = None, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -385,8 +384,7 @@ def rename( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -395,8 +393,7 @@ def rename( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename( self, @@ -457,8 +454,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -467,8 +463,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -477,8 +472,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... @validations.requires_index def rename_axis( @@ -522,8 +516,7 @@ def reset_index( drop: Literal[False] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> bigframes.dataframe.DataFrame: - ... + ) -> bigframes.dataframe.DataFrame: ... @overload def reset_index( @@ -534,8 +527,7 @@ def reset_index( drop: Literal[True] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> Series: - ... + ) -> Series: ... @overload def reset_index( @@ -546,8 +538,7 @@ def reset_index( drop: bool = ..., inplace: Literal[True] = ..., allow_duplicates: Optional[bool] = ..., - ) -> None: - ... + ) -> None: ... @validations.requires_ordering() def reset_index( @@ -1540,9 +1531,9 @@ def ne(self, other: object) -> Series: def items(self): for batch_df in self._block.to_pandas_batches(): - assert ( - batch_df.shape[1] == 1 - ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + assert batch_df.shape[1] == 1, ( + f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + ) for item in batch_df.squeeze(axis=1).items(): yield item @@ -1772,8 +1763,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... @typing.overload def sort_values( @@ -1784,8 +1774,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> Series: - ... + ) -> Series: ... def sort_values( self, @@ -1816,14 +1805,12 @@ def sort_values( @typing.overload # type: ignore[override] def sort_index( self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=... - ) -> Series: - ... + ) -> Series: ... @typing.overload def sort_index( self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=... - ) -> None: - ... + ) -> None: ... @validations.requires_index def sort_index( @@ -2698,18 +2685,28 @@ def _apply_binary_aggregation( @typing.overload def _align( self, other: Series, how="outer" - ) -> tuple[ex.DerefOp, ex.DerefOp, blocks.Block,]: - ... + ) -> tuple[ + ex.DerefOp, + ex.DerefOp, + blocks.Block, + ]: ... @typing.overload def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: - ... + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: ... def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: """Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression.""" values, block = self._align_n( [ diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index ea36cc1925f1..a025256f2b1e 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,8 +432,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq( @@ -449,8 +448,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq( self, @@ -522,8 +520,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def _read_gbq_colab( @@ -532,8 +529,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -594,8 +590,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -611,8 +606,7 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, @@ -759,8 +753,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -774,8 +767,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -926,8 +918,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: - ... + ) -> bigframes.core.indexes.Index: ... @typing.overload def read_pandas( @@ -935,8 +926,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: - ... + ) -> bigframes.series.Series: ... @typing.overload def read_pandas( @@ -944,8 +934,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... def read_pandas( self, From 9c3bc9e3ff851917dd6aba3fb71a18aae6143a52 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:43:04 +0000 Subject: [PATCH 5/6] docs: fix notebook outputs --- ...with-bigframes-over-national-jukebox.ipynb | 153 ++++++++++-------- 1 file changed, 90 insertions(+), 63 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index e70ddfe4a845..3fd66abcbb44 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -25,7 +25,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "194%" } } @@ -47,13 +47,13 @@ "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + "\"recording" ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "216%" } @@ -73,7 +73,7 @@ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", + "\"audio\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -91,7 +91,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "181%" } @@ -116,7 +116,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "275%" } } @@ -138,7 +138,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "214%" } } @@ -153,21 +153,22 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "acf12472", "cell_type": "markdown", "source": [ - "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "4", "zoom": "236%" } @@ -196,7 +197,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4d837a34", @@ -214,7 +216,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "193%" } } @@ -229,7 +231,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "008f0a87", @@ -243,7 +246,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "207%" } } @@ -270,7 +273,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "225%" } } @@ -285,7 +288,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "e00dcb01", @@ -298,7 +302,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "122%" } } @@ -316,7 +320,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "335511be", @@ -328,7 +333,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "134%" } } @@ -343,7 +348,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "595126a1", @@ -364,7 +370,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "cbd59dd9", @@ -393,7 +400,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "161%" } } @@ -411,7 +418,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "84548649", @@ -429,7 +437,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "8be3127f", @@ -441,7 +450,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "216%" } } @@ -471,7 +480,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "211%" } } @@ -491,7 +500,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "d27756f5", @@ -503,7 +513,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "317%" } } @@ -544,7 +554,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "1575c468", @@ -556,7 +567,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "229%" } } @@ -580,7 +591,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "177%" } } @@ -600,7 +611,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "3629f4af", @@ -613,7 +625,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "141%" } } @@ -628,7 +640,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "09ef6c3d", @@ -644,7 +657,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -662,7 +675,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "cf15986a", @@ -681,7 +695,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -701,7 +715,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "778d0ac3", @@ -717,7 +732,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "181%" } } @@ -741,7 +756,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "163%" } } @@ -756,7 +771,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4acfb495", @@ -774,7 +790,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "125%" } } @@ -789,7 +805,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a49d1dde", @@ -804,7 +821,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "178%" } } @@ -824,7 +841,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "15a5bfd3", @@ -836,7 +854,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "224%" } } @@ -856,7 +874,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "172%" } } @@ -871,7 +889,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "810c77d5", @@ -888,7 +907,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "183%" } } @@ -913,7 +932,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "92%" } } @@ -931,7 +950,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "f19c88d3", @@ -950,7 +970,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "127%" } } @@ -965,7 +985,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "06f0312e", @@ -986,7 +1007,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "175%" } } @@ -1006,7 +1027,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "fae3fcae", @@ -1024,7 +1046,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "38423dde", @@ -1037,7 +1060,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "158%" } } @@ -1055,7 +1078,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "37a1dfbd", @@ -1067,7 +1091,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "138%" } } @@ -1082,7 +1106,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a4748e0f", @@ -1113,7 +1138,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "ff22e7eb", @@ -1122,7 +1148,8 @@ "metadata": { "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] } ], "metadata": { From 11f0b0b5307c26a2da127b33ac7cc486f25d57d2 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 16:43:50 -0700 Subject: [PATCH 6/6] Update packages/bigframes/bigframes/operations/strings.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- packages/bigframes/bigframes/operations/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 7cc93d34c07a..a5b9944424b0 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -313,7 +313,7 @@ def _to_blob(self, connection: Optional[str] = None) -> T: ): session = self._data._block.session else: - raise ValueError("to_blob is only supported via Series.str") + raise ValueError(f"{self._to_blob.__name__} is only supported via Series.str") connection = session._create_bq_connection(connection=connection) return self._data._apply_binary_op(connection, ops.obj_make_ref_op)