refactor sync utils into SyncClient class

AlePouroullis · AlePouroullis · commit 67f17c8cbbd6 · 2025-04-28T17:29:40.000+01:00
diff --git a/src/humanloop/client.py b/src/humanloop/client.py
@@ -24,7 +24,7 @@
 from humanloop.otel.processor import HumanloopSpanProcessor
 from humanloop.prompt_utils import populate_template
 from humanloop.prompts.client import PromptsClient
-from humanloop.sync import sync
+from humanloop.sync.sync_client import SyncClient
 
 
 class ExtendedEvalsClient(EvaluationsClient):
@@ -118,6 +118,7 @@ def __init__(
             httpx_client=httpx_client,
         )
 
+        self.sync_client = SyncClient(client=self)
         eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
         eval_client.client = self
         self.evaluations = eval_client
@@ -348,8 +349,8 @@ def agent():
             attributes=attributes,
         )
 
-    def sync(self) -> List[str]:
-        """Sync prompt and agent files from Humanloop to local filesystem.
+    def pull(self) -> List[str]:
+        """Pull prompt and agent files from Humanloop to local filesystem.
 
         This method will:
         1. Fetch all prompt and agent files from your Humanloop workspace
@@ -372,7 +373,7 @@ def sync(self) -> List[str]:
 
         :return: List of successfully processed file paths
         """
-        return sync(self)
+        return self.sync_client.pull()
 
 
 class AsyncHumanloop(AsyncBaseHumanloop):
diff --git a/src/humanloop/sync/__init__.py b/src/humanloop/sync/__init__.py
@@ -1,3 +1,3 @@
-from humanloop.sync.sync_utils import sync
+from humanloop.sync.sync_client import SyncClient
 
-__all__ = ["sync"]
+__all__ = ["SyncClient"]
diff --git a/src/humanloop/sync/sync_client.py b/src/humanloop/sync/sync_client.py
@@ -0,0 +1,161 @@
+import multiprocessing
+import os
+import logging
+from pathlib import Path
+import concurrent.futures
+from typing import List, TYPE_CHECKING, Union, cast, Optional
+
+from humanloop.types import FileType, PromptResponse, AgentResponse, ToolResponse, DatasetResponse, EvaluatorResponse, FlowResponse
+from humanloop.core.api_error import ApiError
+
+if TYPE_CHECKING:
+    from humanloop.base_client import BaseHumanloop
+
+# Set up logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+console_handler = logging.StreamHandler()
+formatter = logging.Formatter("%(message)s")
+console_handler.setFormatter(formatter)
+if not logger.hasHandlers():
+    logger.addHandler(console_handler)
+
+class SyncClient:
+    """Client for managing synchronization between local filesystem and Humanloop."""
+    
+    def __init__(
+        self, 
+        client: "BaseHumanloop",
+        base_dir: str = "humanloop",
+        max_workers: Optional[int] = None
+    ):
+        """
+        Parameters
+        ----------
+        client: Humanloop client instance
+        base_dir: Base directory for synced files (default: "humanloop")
+        max_workers: Maximum number of worker threads (default: CPU count * 2)
+        """
+        self.client = client
+        self.base_dir = Path(base_dir)
+        self.max_workers = max_workers or multiprocessing.cpu_count() * 2
+
+    def _save_serialized_file(self, serialized_content: str, file_path: str, file_type: FileType) -> None:
+        """Save serialized file to local filesystem.
+
+        Args:
+            serialized_content: The content to save
+            file_path: The path where to save the file
+            file_type: The type of file (prompt or agent)
+        """
+        try:
+            # Create full path including base_dir prefix
+            full_path = self.base_dir / file_path
+            # Create directory if it doesn't exist
+            full_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Add file type extension
+            new_path = full_path.parent / f"{full_path.stem}.{file_type}"
+
+            # Write content to file
+            with open(new_path, "w") as f:
+                f.write(serialized_content)
+            logger.info(f"Syncing {file_type} {file_path}")
+        except Exception as e:
+            logger.error(f"Failed to sync {file_type} {file_path}: {str(e)}")
+            raise
+
+    def _process_file(
+        self, 
+        file: Union[PromptResponse, AgentResponse, ToolResponse, DatasetResponse, EvaluatorResponse, FlowResponse]
+    ) -> None:
+        """Process a single file by serializing and saving it.
+
+        Args:
+            file: The file to process (must be a PromptResponse or AgentResponse)
+        """
+        try:
+            # Skip if not a prompt or agent
+            if file.type not in ["prompt", "agent"]:
+                logger.warning(f"Skipping unsupported file type: {file.type}")
+                return
+
+            # Cast to the correct type for type checking
+            if file.type == "prompt":
+                file = cast(PromptResponse, file)
+            elif file.type == "agent":
+                file = cast(AgentResponse, file)
+
+            # Serialize the file based on its type
+            try:
+                if file.type == "prompt":
+                    serialized = self.client.prompts.serialize(id=file.id)
+                elif file.type == "agent":
+                    serialized = self.client.agents.serialize(id=file.id)
+                else:
+                    logger.warning(f"Skipping unsupported file type: {file.type}")
+                    return
+            except ApiError as e:
+                # The SDK returns the YAML content in the error body when it can't parse as JSON
+                if e.status_code == 200:
+                    serialized = e.body
+                else:
+                    raise
+            except Exception as e:
+                logger.error(f"Failed to serialize {file.type} {file.id}: {str(e)}")
+                raise
+
+            # Save to local filesystem
+            self._save_serialized_file(serialized, file.path, file.type)
+
+        except Exception as e:
+            logger.error(f"Error processing file {file.path}: {str(e)}")
+            raise
+
+    def pull(self) -> List[str]:
+        """Sync prompt and agent files from Humanloop to local filesystem.
+
+        Returns:
+            List of successfully processed file paths
+        """
+        successful_files = []
+        failed_files = []
+
+        # Create a thread pool for processing files
+        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            futures = []
+            page = 1
+
+            while True:
+                try:
+                    response = self.client.files.list_files(type=["prompt", "agent"], page=page)
+
+                    if len(response.records) == 0:
+                        break
+
+                    # Submit each file for processing
+                    for file in response.records:
+                        future = executor.submit(self._process_file, file)
+                        futures.append((file.path, future))
+
+                    page += 1
+                except Exception as e:
+                    logger.error(f"Failed to fetch page {page}: {str(e)}")
+                    break
+
+            # Wait for all tasks to complete
+            for file_path, future in futures:
+                try:
+                    future.result()
+                    successful_files.append(file_path)
+                except Exception as e:
+                    failed_files.append(file_path)
+                    logger.error(f"Task failed for {file_path}: {str(e)}")
+
+        # Log summary
+        if successful_files:
+            logger.info(f"\nSynced {len(successful_files)} files")
+        if failed_files:
+            logger.error(f"Failed to sync {len(failed_files)} files")
+
+        return successful_files
diff --git a/src/humanloop/sync/sync_utils.py b/src/humanloop/sync/sync_utils.py