Skip to content

Commit eb744fa

Browse files
committed
cloud-browser
Add Cloud Browser support to the SDK. BrowserConfig holds the full parameter surface (proxy_pool, os, country, session, auto_close, timeout, debug, extensions, block_*, screenshot, resolution, target_url, cache, blacklist, unblock, unblock_timeout, browser_brand, byop_proxy) and ScrapflyClient.cloud_browser(config) builds the WebSocket URL ready for Playwright connect_over_cdp(). byop_proxy lets Custom plan users route browser traffic through their own proxy. Supported schemes: http, https, socks5, socks5h, socks5+udp, socks5h+udp. The +udp variants enable HTTP/3 (QUIC) via SOCKS5 UDP ASSOCIATE for providers that implement RFC 1928 §7. See https://scrapfly.io/docs/cloud-browser-api/getting-started
1 parent 754e84e commit eb744fa

12 files changed

Lines changed: 841 additions & 2 deletions
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
Basic Browser Use connection test with Scrapfly Cloud Browser.
3+
4+
Browser Use connects to Cloud Browser via CDP. The first connection may trigger
5+
a reconnection (this is normal - the Cloud Browser allocates the browser instance
6+
during the initial WebSocket handshake).
7+
8+
Requires: Python 3.11+, browser-use, scrapfly-sdk
9+
"""
10+
import asyncio
11+
from scrapfly import ScrapflyClient, BrowserConfig
12+
from browser_use import Browser, BrowserProfile
13+
14+
scrapfly = ScrapflyClient(
15+
key='scp-live-d8ac176c2f9d48b993b58675bdf71615',
16+
cloud_browser_host='wss://browser.scrapfly.home',
17+
verify=False,
18+
)
19+
20+
config = BrowserConfig(
21+
proxy_pool='datacenter',
22+
os='linux',
23+
)
24+
25+
cdp_url = scrapfly.cloud_browser(config)
26+
print(f"CDP URL: {cdp_url[:80]}...")
27+
28+
29+
async def test_connection():
30+
browser = Browser(
31+
browser_profile=BrowserProfile(
32+
cdp_url=cdp_url,
33+
)
34+
)
35+
36+
# Start the browser session (may reconnect once during allocation)
37+
await browser.start()
38+
print("Connected to Cloud Browser")
39+
40+
# Get a page and navigate
41+
page = await browser.get_current_page()
42+
await page.goto('https://web-scraping.dev/products')
43+
44+
title = await page.title()
45+
url = page.url
46+
print(f"Page title: {title}")
47+
print(f"Page URL: {url}")
48+
49+
await browser.close()
50+
print("Browser closed successfully")
51+
52+
53+
asyncio.run(test_connection())
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
# Browser Use CLI with Scrapfly Cloud Browser
3+
#
4+
# The CLI connects to Cloud Browser via CDP and provides interactive
5+
# browser control from the terminal.
6+
#
7+
# Requires: browser-use CLI installed (pip install browser-use)
8+
9+
API_KEY="YOUR_API_KEY"
10+
BROWSER_WS="wss://browser.scrapfly.io?api_key=${API_KEY}&proxy_pool=datacenter&os=linux"
11+
12+
# Open a page in the cloud browser
13+
browser-use --cdp-url "$BROWSER_WS" open https://web-scraping.dev/products
14+
15+
# Get page state (title, URL, clickable elements)
16+
browser-use state
17+
18+
# Click on a product link (by element index from state output)
19+
browser-use click 5
20+
21+
# Take a screenshot
22+
browser-use screenshot product.png
23+
24+
# Type into a search field
25+
browser-use input 3 "web scraping"
26+
27+
# Press Enter
28+
browser-use keys "Enter"
29+
30+
# Close the session (stops billing)
31+
browser-use close
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
"""
2+
Connect Browser Use AI agent to Scrapfly Cloud Browser.
3+
4+
Browser Use uses the CDP protocol to control remote browsers.
5+
Note: The initial connection may trigger a WebSocket reconnection - this is normal
6+
and handled automatically by browser-use's reconnection logic.
7+
8+
Requirements:
9+
- Python 3.11+
10+
- pip install browser-use scrapfly-sdk langchain-openai
11+
- OPENAI_API_KEY environment variable set
12+
"""
13+
import asyncio
14+
from scrapfly import ScrapflyClient, BrowserConfig
15+
from langchain_openai import ChatOpenAI
16+
from browser_use import Agent, Browser, BrowserProfile
17+
18+
scrapfly = ScrapflyClient(
19+
key='YOUR_API_KEY',
20+
)
21+
22+
# Generate the Cloud Browser CDP endpoint
23+
config = BrowserConfig(
24+
proxy_pool='datacenter',
25+
os='linux',
26+
)
27+
cdp_url = scrapfly.cloud_browser(config)
28+
29+
30+
async def run_agent():
31+
# Connect to Cloud Browser via CDP
32+
browser = Browser(
33+
browser_profile=BrowserProfile(
34+
cdp_url=cdp_url,
35+
)
36+
)
37+
38+
# Create AI agent with natural language task
39+
agent = Agent(
40+
task=(
41+
"Go to https://web-scraping.dev/products and extract all product names and prices. "
42+
"Return the data as a JSON list."
43+
),
44+
llm=ChatOpenAI(model="gpt-4o"),
45+
browser=browser,
46+
)
47+
48+
result = await agent.run()
49+
print("Agent result:", result)
50+
51+
52+
asyncio.run(run_agent())
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""Connect to Scrapfly Cloud Browser using Playwright (Python)"""
2+
from scrapfly import ScrapflyClient, BrowserConfig
3+
from playwright.sync_api import sync_playwright
4+
5+
scrapfly = ScrapflyClient(key='__API_KEY__')
6+
7+
# Configure Cloud Browser connection
8+
browser_config = BrowserConfig(
9+
proxy_pool='datacenter',
10+
os='linux',
11+
)
12+
13+
# Get the CDP WebSocket URL
14+
cdp_url = scrapfly.cloud_browser(browser_config)
15+
16+
def run():
17+
with sync_playwright() as p:
18+
browser = None
19+
try:
20+
# Connect to Cloud Browser
21+
browser = p.chromium.connect_over_cdp(cdp_url)
22+
23+
context = browser.contexts[0]
24+
page = context.pages[0] if context.pages else context.new_page()
25+
26+
# Navigate and interact
27+
page.goto('https://web-scraping.dev')
28+
print('Page title:', page.title())
29+
30+
# Take a screenshot
31+
page.screenshot(path='screenshot.png')
32+
finally:
33+
if browser:
34+
browser.close()
35+
36+
run()
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
Connect to Scrapfly Cloud Browser for Selenium users.
3+
4+
Selenium does not natively support remote CDP WebSocket connections.
5+
This example uses the /json/version discovery endpoint + Playwright as the CDP transport.
6+
7+
For direct Playwright usage (recommended), see playwright_connect.py
8+
"""
9+
import requests
10+
from playwright.sync_api import sync_playwright
11+
12+
API_KEY = 'scp-live-d8ac176c2f9d48b993b58675bdf71615'
13+
14+
# Discover WebSocket URL via standard Chrome DevTools HTTP endpoint
15+
version_info = requests.get(
16+
'https://browser.scrapfly.home/json/version',
17+
params={
18+
'key': API_KEY,
19+
'proxy_pool': 'datacenter',
20+
'os': 'linux',
21+
'country': 'us',
22+
},
23+
verify=False,
24+
).json()
25+
26+
ws_url = version_info['webSocketDebuggerUrl']
27+
print(f"Browser: {version_info['Browser']}")
28+
print(f"WebSocket URL: {ws_url[:80]}...")
29+
30+
# Connect via Playwright CDP
31+
with sync_playwright() as p:
32+
browser = p.chromium.connect_over_cdp(ws_url)
33+
context = browser.contexts[0]
34+
page = context.pages[0] if context.pages else context.new_page()
35+
36+
page.goto('https://web-scraping.dev/products')
37+
print(f"Page title: {page.title()}")
38+
39+
# Extract products (Selenium-style)
40+
products = page.locator('.product-thumb').all()
41+
for product in products[:3]:
42+
title = product.locator('h3').inner_text()
43+
print(f" Product: {title}")
44+
45+
page.screenshot(path='screenshot.png')
46+
print("Screenshot saved")
47+
48+
browser.close()

examples/browser/session_resume.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""Session Resume: reconnect to an existing Cloud Browser session"""
2+
import time
3+
from scrapfly import ScrapflyClient, BrowserConfig
4+
from playwright.sync_api import sync_playwright
5+
6+
scrapfly = ScrapflyClient(key='__API_KEY__')
7+
8+
SESSION_ID = 'my-persistent-session'
9+
10+
# Configure with session + auto_close=False for persistence
11+
browser_config = BrowserConfig(
12+
proxy_pool='datacenter',
13+
session=SESSION_ID,
14+
auto_close=False,
15+
)
16+
17+
cdp_url = scrapfly.cloud_browser(browser_config)
18+
19+
20+
def first_connection():
21+
"""First connection: navigate and set cookies"""
22+
print('=== First Connection ===')
23+
with sync_playwright() as p:
24+
browser = p.chromium.connect_over_cdp(cdp_url)
25+
context = browser.contexts[0]
26+
page = context.new_page()
27+
page.goto('https://web-scraping.dev')
28+
29+
# Set a cookie
30+
context.add_cookies([{
31+
'name': 'session_token',
32+
'value': 'abc123',
33+
'domain': 'web-scraping.dev',
34+
'path': '/'
35+
}])
36+
37+
print('Cookies set, disconnecting...')
38+
browser.close() # Disconnects CDP - browser stays alive (auto_close=false)
39+
40+
41+
def second_connection():
42+
"""Second connection: cookies are still there"""
43+
print('=== Second Connection (Resume) ===')
44+
with sync_playwright() as p:
45+
browser = p.chromium.connect_over_cdp(cdp_url)
46+
context = browser.contexts[0]
47+
page = context.pages[0] if context.pages else context.new_page()
48+
49+
# Cookies are still there!
50+
cookies = context.cookies('https://web-scraping.dev')
51+
print('Cookies from previous session:', cookies)
52+
53+
browser.close() # Disconnects CDP
54+
55+
56+
first_connection()
57+
time.sleep(2) # Wait a bit, then reconnect
58+
second_connection()
59+
60+
# Terminate the session when fully done
61+
scrapfly.cloud_browser_session_stop(SESSION_ID)
62+
print(f'Session {SESSION_ID} terminated')
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""
2+
Stagehand Cloud Browser Connection
3+
4+
Stagehand is a JavaScript/TypeScript-only library (@browserbase/stagehand)
5+
and cannot be used directly from Python.
6+
7+
You can generate the CDP WebSocket URL from Python and use it in your
8+
JavaScript Stagehand code:
9+
10+
from scrapfly import ScrapflyClient, BrowserConfig
11+
12+
scrapfly = ScrapflyClient(key='__API_KEY__')
13+
cdp_url = scrapfly.cloud_browser(BrowserConfig(proxy_pool='datacenter'))
14+
print(f"Use this CDP URL in your Stagehand JS code: {cdp_url}")
15+
16+
JavaScript Stagehand example:
17+
18+
import { Stagehand } from "@browserbase/stagehand";
19+
20+
const stagehand = new Stagehand({
21+
env: "BROWSERBASE",
22+
browserbaseConnectURL: "wss://browser.scrapfly.io?api_key=YOUR_KEY&proxy_pool=datacenter",
23+
});
24+
25+
await stagehand.init();
26+
await stagehand.page.goto("https://web-scraping.dev");
27+
await stagehand.act("click on the products link");
28+
29+
const products = await stagehand.extract({
30+
instruction: "extract all product names and prices",
31+
schema: { products: [{ name: "string", price: "string" }] }
32+
});
33+
34+
console.log("Products:", products);
35+
await stagehand.close();
36+
37+
For full documentation, see:
38+
https://scrapfly.io/docs/cloud-browser-api/stagehand
39+
"""
40+
41+
from scrapfly import ScrapflyClient, BrowserConfig
42+
43+
scrapfly = ScrapflyClient(key='__API_KEY__')
44+
cdp_url = scrapfly.cloud_browser(BrowserConfig(proxy_pool='datacenter', os='linux'))
45+
print(f"Use this CDP URL in your Stagehand JS code:\n{cdp_url}")

scrapfly/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
CrawlerWebhook,
4949
webhook_from_payload
5050
)
51+
from .browser_config import BrowserConfig, ProxyPool, OperatingSystem
5152

5253

5354
__all__: Tuple[str, ...] = (
@@ -102,4 +103,7 @@
102103
'CrawlCompletedWebhook',
103104
'CrawlerWebhook',
104105
'webhook_from_payload',
106+
'BrowserConfig',
107+
'ProxyPool',
108+
'OperatingSystem',
105109
)

0 commit comments

Comments
 (0)