fix: Refactor Anthropic integration to support both direct API and Vertex AI

EItanya · GWeale · copybara-github · commit 8e82838f1e59 · 2025-12-01T11:04:51.000-08:00
This change introduces an `AnthropicLlm` base class for direct Anthropic API calls using `AsyncAnthropic`. The existing `Claude` class now inherits from `AnthropicLlm` and is specialized to use `AsyncAnthropicVertex` for models hosted on Vertex AI. The `messages.create` call is now properly awaited Merge: google#2904 Co-authored-by: George Weale <gweale@google.com> PiperOrigin-RevId: 838851026
diff --git a/src/google/adk/models/anthropic_llm.py b/src/google/adk/models/anthropic_llm.py
@@ -28,7 +28,8 @@
 from typing import TYPE_CHECKING
 from typing import Union
 
-from anthropic import AnthropicVertex
+from anthropic import AsyncAnthropic
+from anthropic import AsyncAnthropicVertex
 from anthropic import NOT_GIVEN
 from anthropic import types as anthropic_types
 from google.genai import types
@@ -41,7 +42,7 @@
 if TYPE_CHECKING:
   from .llm_request import LlmRequest
 
-__all__ = ["Claude"]
+__all__ = ["AnthropicLlm", "Claude"]
 
 logger = logging.getLogger("google_adk." + __name__)
 
@@ -264,15 +265,15 @@ def function_declaration_to_tool_param(
   )
 
 
-class Claude(BaseLlm):
-  """Integration with Claude models served from Vertex AI.
+class AnthropicLlm(BaseLlm):
+  """Integration with Claude models via the Anthropic API.
 
   Attributes:
     model: The name of the Claude model.
     max_tokens: The maximum number of tokens to generate.
   """
 
-  model: str = "claude-3-5-sonnet-v2@20241022"
+  model: str = "claude-sonnet-4-20250514"
   max_tokens: int = 8192
 
   @classmethod
@@ -304,7 +305,7 @@ async def generate_content_async(
         else NOT_GIVEN
     )
     # TODO(b/421255973): Enable streaming for anthropic models.
-    message = self._anthropic_client.messages.create(
+    message = await self._anthropic_client.messages.create(
         model=llm_request.model,
         system=llm_request.config.system_instruction,
         messages=messages,
@@ -315,7 +316,23 @@ async def generate_content_async(
     yield message_to_generate_content_response(message)
 
   @cached_property
-  def _anthropic_client(self) -> AnthropicVertex:
+  def _anthropic_client(self) -> AsyncAnthropic:
+    return AsyncAnthropic()
+
+
+class Claude(AnthropicLlm):
+  """Integration with Claude models served from Vertex AI.
+
+  Attributes:
+    model: The name of the Claude model.
+    max_tokens: The maximum number of tokens to generate.
+  """
+
+  model: str = "claude-3-5-sonnet-v2@20241022"
+
+  @cached_property
+  @override
+  def _anthropic_client(self) -> AsyncAnthropicVertex:
     if (
         "GOOGLE_CLOUD_PROJECT" not in os.environ
         or "GOOGLE_CLOUD_LOCATION" not in os.environ
@@ -325,7 +342,7 @@ def _anthropic_client(self) -> AnthropicVertex:
           " Anthropic on Vertex."
       )
 
-    return AnthropicVertex(
+    return AsyncAnthropicVertex(
         project_id=os.environ["GOOGLE_CLOUD_PROJECT"],
         region=os.environ["GOOGLE_CLOUD_LOCATION"],
     )
diff --git a/tests/unittests/models/test_anthropic_llm.py b/tests/unittests/models/test_anthropic_llm.py
@@ -19,6 +19,7 @@
 from anthropic import types as anthropic_types
 from google.adk import version as adk_version
 from google.adk.models import anthropic_llm
+from google.adk.models.anthropic_llm import AnthropicLlm
 from google.adk.models.anthropic_llm import Claude
 from google.adk.models.anthropic_llm import content_to_message_param
 from google.adk.models.anthropic_llm import function_declaration_to_tool_param
@@ -359,6 +360,37 @@ async def mock_coro():
       assert responses[0].content.parts[0].text == "Hello, how can I help you?"
 
 
+@pytest.mark.asyncio
+async def test_anthropic_llm_generate_content_async(
+    llm_request, generate_content_response, generate_llm_response
+):
+  anthropic_llm_instance = AnthropicLlm(model="claude-sonnet-4-20250514")
+  with mock.patch.object(
+      anthropic_llm_instance, "_anthropic_client"
+  ) as mock_client:
+    with mock.patch.object(
+        anthropic_llm,
+        "message_to_generate_content_response",
+        return_value=generate_llm_response,
+    ):
+      # Create a mock coroutine that returns the generate_content_response.
+      async def mock_coro():
+        return generate_content_response
+
+      # Assign the coroutine to the mocked method
+      mock_client.messages.create.return_value = mock_coro()
+
+      responses = [
+          resp
+          async for resp in anthropic_llm_instance.generate_content_async(
+              llm_request, stream=False
+          )
+      ]
+      assert len(responses) == 1
+      assert isinstance(responses[0], LlmResponse)
+      assert responses[0].content.parts[0].text == "Hello, how can I help you?"
+
+
 @pytest.mark.asyncio
 async def test_generate_content_async_with_max_tokens(
     llm_request, generate_content_response, generate_llm_response