cloud-browser

jjsaunier · jjsaunier · commit eb744faa5ee4 · 2026-04-07T19:13:49.000Z
Add Cloud Browser support to the SDK. BrowserConfig holds the full parameter surface (proxy_pool, os, country, session, auto_close, timeout, debug, extensions, block_*, screenshot, resolution, target_url, cache, blacklist, unblock, unblock_timeout, browser_brand, byop_proxy) and ScrapflyClient.cloud_browser(config) builds the WebSocket URL ready for Playwright connect_over_cdp(). byop_proxy lets Custom plan users route browser traffic through their own proxy. Supported schemes: http, https, socks5, socks5h, socks5+udp, socks5h+udp. The +udp variants enable HTTP/3 (QUIC) via SOCKS5 UDP ASSOCIATE for providers that implement RFC 1928 §7. See https://scrapfly.io/docs/cloud-browser-api/getting-started
diff --git a/examples/browser/browser_use_basic.py b/examples/browser/browser_use_basic.py
@@ -0,0 +1,53 @@
+"""
+Basic Browser Use connection test with Scrapfly Cloud Browser.
+
+Browser Use connects to Cloud Browser via CDP. The first connection may trigger
+a reconnection (this is normal - the Cloud Browser allocates the browser instance
+during the initial WebSocket handshake).
+
+Requires: Python 3.11+, browser-use, scrapfly-sdk
+"""
+import asyncio
+from scrapfly import ScrapflyClient, BrowserConfig
+from browser_use import Browser, BrowserProfile
+
+scrapfly = ScrapflyClient(
+    key='scp-live-d8ac176c2f9d48b993b58675bdf71615',
+    cloud_browser_host='wss://browser.scrapfly.home',
+    verify=False,
+)
+
+config = BrowserConfig(
+    proxy_pool='datacenter',
+    os='linux',
+)
+
+cdp_url = scrapfly.cloud_browser(config)
+print(f"CDP URL: {cdp_url[:80]}...")
+
+
+async def test_connection():
+    browser = Browser(
+        browser_profile=BrowserProfile(
+            cdp_url=cdp_url,
+        )
+    )
+
+    # Start the browser session (may reconnect once during allocation)
+    await browser.start()
+    print("Connected to Cloud Browser")
+
+    # Get a page and navigate
+    page = await browser.get_current_page()
+    await page.goto('https://web-scraping.dev/products')
+
+    title = await page.title()
+    url = page.url
+    print(f"Page title: {title}")
+    print(f"Page URL: {url}")
+
+    await browser.close()
+    print("Browser closed successfully")
+
+
+asyncio.run(test_connection())
diff --git a/examples/browser/browser_use_cli.sh b/examples/browser/browser_use_cli.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Browser Use CLI with Scrapfly Cloud Browser
+#
+# The CLI connects to Cloud Browser via CDP and provides interactive
+# browser control from the terminal.
+#
+# Requires: browser-use CLI installed (pip install browser-use)
+
+API_KEY="YOUR_API_KEY"
+BROWSER_WS="wss://browser.scrapfly.io?api_key=${API_KEY}&proxy_pool=datacenter&os=linux"
+
+# Open a page in the cloud browser
+browser-use --cdp-url "$BROWSER_WS" open https://web-scraping.dev/products
+
+# Get page state (title, URL, clickable elements)
+browser-use state
+
+# Click on a product link (by element index from state output)
+browser-use click 5
+
+# Take a screenshot
+browser-use screenshot product.png
+
+# Type into a search field
+browser-use input 3 "web scraping"
+
+# Press Enter
+browser-use keys "Enter"
+
+# Close the session (stops billing)
+browser-use close
diff --git a/examples/browser/browser_use_connect.py b/examples/browser/browser_use_connect.py
@@ -0,0 +1,52 @@
+"""
+Connect Browser Use AI agent to Scrapfly Cloud Browser.
+
+Browser Use uses the CDP protocol to control remote browsers.
+Note: The initial connection may trigger a WebSocket reconnection - this is normal
+and handled automatically by browser-use's reconnection logic.
+
+Requirements:
+  - Python 3.11+
+  - pip install browser-use scrapfly-sdk langchain-openai
+  - OPENAI_API_KEY environment variable set
+"""
+import asyncio
+from scrapfly import ScrapflyClient, BrowserConfig
+from langchain_openai import ChatOpenAI
+from browser_use import Agent, Browser, BrowserProfile
+
+scrapfly = ScrapflyClient(
+    key='YOUR_API_KEY',
+)
+
+# Generate the Cloud Browser CDP endpoint
+config = BrowserConfig(
+    proxy_pool='datacenter',
+    os='linux',
+)
+cdp_url = scrapfly.cloud_browser(config)
+
+
+async def run_agent():
+    # Connect to Cloud Browser via CDP
+    browser = Browser(
+        browser_profile=BrowserProfile(
+            cdp_url=cdp_url,
+        )
+    )
+
+    # Create AI agent with natural language task
+    agent = Agent(
+        task=(
+            "Go to https://web-scraping.dev/products and extract all product names and prices. "
+            "Return the data as a JSON list."
+        ),
+        llm=ChatOpenAI(model="gpt-4o"),
+        browser=browser,
+    )
+
+    result = await agent.run()
+    print("Agent result:", result)
+
+
+asyncio.run(run_agent())
diff --git a/examples/browser/playwright_connect.py b/examples/browser/playwright_connect.py
@@ -0,0 +1,36 @@
+"""Connect to Scrapfly Cloud Browser using Playwright (Python)"""
+from scrapfly import ScrapflyClient, BrowserConfig
+from playwright.sync_api import sync_playwright
+
+scrapfly = ScrapflyClient(key='__API_KEY__')
+
+# Configure Cloud Browser connection
+browser_config = BrowserConfig(
+    proxy_pool='datacenter',
+    os='linux',
+)
+
+# Get the CDP WebSocket URL
+cdp_url = scrapfly.cloud_browser(browser_config)
+
+def run():
+    with sync_playwright() as p:
+        browser = None
+        try:
+            # Connect to Cloud Browser
+            browser = p.chromium.connect_over_cdp(cdp_url)
+
+            context = browser.contexts[0]
+            page = context.pages[0] if context.pages else context.new_page()
+
+            # Navigate and interact
+            page.goto('https://web-scraping.dev')
+            print('Page title:', page.title())
+
+            # Take a screenshot
+            page.screenshot(path='screenshot.png')
+        finally:
+            if browser:
+                browser.close()
+
+run()
diff --git a/examples/browser/selenium_connect.py b/examples/browser/selenium_connect.py
@@ -0,0 +1,48 @@
+"""
+Connect to Scrapfly Cloud Browser for Selenium users.
+
+Selenium does not natively support remote CDP WebSocket connections.
+This example uses the /json/version discovery endpoint + Playwright as the CDP transport.
+
+For direct Playwright usage (recommended), see playwright_connect.py
+"""
+import requests
+from playwright.sync_api import sync_playwright
+
+API_KEY = 'scp-live-d8ac176c2f9d48b993b58675bdf71615'
+
+# Discover WebSocket URL via standard Chrome DevTools HTTP endpoint
+version_info = requests.get(
+    'https://browser.scrapfly.home/json/version',
+    params={
+        'key': API_KEY,
+        'proxy_pool': 'datacenter',
+        'os': 'linux',
+        'country': 'us',
+    },
+    verify=False,
+).json()
+
+ws_url = version_info['webSocketDebuggerUrl']
+print(f"Browser: {version_info['Browser']}")
+print(f"WebSocket URL: {ws_url[:80]}...")
+
+# Connect via Playwright CDP
+with sync_playwright() as p:
+    browser = p.chromium.connect_over_cdp(ws_url)
+    context = browser.contexts[0]
+    page = context.pages[0] if context.pages else context.new_page()
+
+    page.goto('https://web-scraping.dev/products')
+    print(f"Page title: {page.title()}")
+
+    # Extract products (Selenium-style)
+    products = page.locator('.product-thumb').all()
+    for product in products[:3]:
+        title = product.locator('h3').inner_text()
+        print(f"  Product: {title}")
+
+    page.screenshot(path='screenshot.png')
+    print("Screenshot saved")
+
+    browser.close()
diff --git a/examples/browser/session_resume.py b/examples/browser/session_resume.py
@@ -0,0 +1,62 @@
+"""Session Resume: reconnect to an existing Cloud Browser session"""
+import time
+from scrapfly import ScrapflyClient, BrowserConfig
+from playwright.sync_api import sync_playwright
+
+scrapfly = ScrapflyClient(key='__API_KEY__')
+
+SESSION_ID = 'my-persistent-session'
+
+# Configure with session + auto_close=False for persistence
+browser_config = BrowserConfig(
+    proxy_pool='datacenter',
+    session=SESSION_ID,
+    auto_close=False,
+)
+
+cdp_url = scrapfly.cloud_browser(browser_config)
+
+
+def first_connection():
+    """First connection: navigate and set cookies"""
+    print('=== First Connection ===')
+    with sync_playwright() as p:
+        browser = p.chromium.connect_over_cdp(cdp_url)
+        context = browser.contexts[0]
+        page = context.new_page()
+        page.goto('https://web-scraping.dev')
+
+        # Set a cookie
+        context.add_cookies([{
+            'name': 'session_token',
+            'value': 'abc123',
+            'domain': 'web-scraping.dev',
+            'path': '/'
+        }])
+
+        print('Cookies set, disconnecting...')
+        browser.close()  # Disconnects CDP - browser stays alive (auto_close=false)
+
+
+def second_connection():
+    """Second connection: cookies are still there"""
+    print('=== Second Connection (Resume) ===')
+    with sync_playwright() as p:
+        browser = p.chromium.connect_over_cdp(cdp_url)
+        context = browser.contexts[0]
+        page = context.pages[0] if context.pages else context.new_page()
+
+        # Cookies are still there!
+        cookies = context.cookies('https://web-scraping.dev')
+        print('Cookies from previous session:', cookies)
+
+        browser.close()  # Disconnects CDP
+
+
+first_connection()
+time.sleep(2)  # Wait a bit, then reconnect
+second_connection()
+
+# Terminate the session when fully done
+scrapfly.cloud_browser_session_stop(SESSION_ID)
+print(f'Session {SESSION_ID} terminated')
diff --git a/examples/browser/stagehand_connect.py b/examples/browser/stagehand_connect.py
@@ -0,0 +1,45 @@
+"""
+Stagehand Cloud Browser Connection
+
+Stagehand is a JavaScript/TypeScript-only library (@browserbase/stagehand)
+and cannot be used directly from Python.
+
+You can generate the CDP WebSocket URL from Python and use it in your
+JavaScript Stagehand code:
+
+    from scrapfly import ScrapflyClient, BrowserConfig
+
+    scrapfly = ScrapflyClient(key='__API_KEY__')
+    cdp_url = scrapfly.cloud_browser(BrowserConfig(proxy_pool='datacenter'))
+    print(f"Use this CDP URL in your Stagehand JS code: {cdp_url}")
+
+JavaScript Stagehand example:
+
+    import { Stagehand } from "@browserbase/stagehand";
+
+    const stagehand = new Stagehand({
+        env: "BROWSERBASE",
+        browserbaseConnectURL: "wss://browser.scrapfly.io?api_key=YOUR_KEY&proxy_pool=datacenter",
+    });
+
+    await stagehand.init();
+    await stagehand.page.goto("https://web-scraping.dev");
+    await stagehand.act("click on the products link");
+
+    const products = await stagehand.extract({
+        instruction: "extract all product names and prices",
+        schema: { products: [{ name: "string", price: "string" }] }
+    });
+
+    console.log("Products:", products);
+    await stagehand.close();
+
+For full documentation, see:
+https://scrapfly.io/docs/cloud-browser-api/stagehand
+"""
+
+from scrapfly import ScrapflyClient, BrowserConfig
+
+scrapfly = ScrapflyClient(key='__API_KEY__')
+cdp_url = scrapfly.cloud_browser(BrowserConfig(proxy_pool='datacenter', os='linux'))
+print(f"Use this CDP URL in your Stagehand JS code:\n{cdp_url}")
diff --git a/scrapfly/__init__.py b/scrapfly/__init__.py
@@ -48,6 +48,7 @@
     CrawlerWebhook,
     webhook_from_payload
 )
+from .browser_config import BrowserConfig, ProxyPool, OperatingSystem
 
 
 __all__: Tuple[str, ...] = (
@@ -102,4 +103,7 @@
     'CrawlCompletedWebhook',
     'CrawlerWebhook',
     'webhook_from_payload',
+    'BrowserConfig',
+    'ProxyPool',
+    'OperatingSystem',
 )
diff --git a/scrapfly/browser_config.py b/scrapfly/browser_config.py
diff --git a/scrapfly/client.py b/scrapfly/client.py
diff --git a/scrapfly/scrape_config.py b/scrapfly/scrape_config.py
diff --git a/setup.py b/setup.py

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@`
`48`	`48`	`CrawlerWebhook,`
`49`	`49`	`webhook_from_payload`
`50`	`50`	`)`
	`51`	`+from .browser_config import BrowserConfig, ProxyPool, OperatingSystem`
`51`	`52`
`52`	`53`
`53`	`54`	`__all__: Tuple[str, ...] = (`
`@@ -102,4 +103,7 @@`
`102`	`103`	`'CrawlCompletedWebhook',`
`103`	`104`	`'CrawlerWebhook',`
`104`	`105`	`'webhook_from_payload',`
	`106`	`+ 'BrowserConfig',`
	`107`	`+ 'ProxyPool',`
	`108`	`+ 'OperatingSystem',`
`105`	`109`	`)`