Skip to content

Commit 908c944

Browse files
authored
fix: restore proxy functionality for PlaywrightCrawler broken in v0.5 (#889)
### Description - fix for proxy in PlaywrightCrawler ### Issues - Closes: #887 ### Testing - Added test to verify proxy is correctly set for PlaywrightCrawler
1 parent dea6ca3 commit 908c944

2 files changed

Lines changed: 28 additions & 2 deletions

File tree

src/crawlee/browsers/_playwright_browser_controller.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ async def _create_browser_context(
171171
)
172172

173173
if proxy_info:
174-
if browser_new_context_options['proxy']:
174+
if browser_new_context_options.get('proxy'):
175175
logger.warning("browser_new_context_options['proxy'] overriden by explicit `proxy_info` argument.")
176176

177177
browser_new_context_options['proxy'] = ProxySettings(

tests/unit/crawlers/_playwright/test_playwright_crawler.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
PW_CHROMIUM_HEADLESS_DEFAULT_USER_AGENT,
1919
PW_FIREFOX_HEADLESS_DEFAULT_USER_AGENT,
2020
)
21+
from crawlee.proxy_configuration import ProxyConfiguration
2122

2223
if TYPE_CHECKING:
2324
from yarl import URL
2425

25-
from crawlee.crawlers import PlaywrightCrawlingContext
26+
from crawlee.crawlers import PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
2627

2728

2829
async def test_basic_request(httpbin: URL) -> None:
@@ -188,3 +189,28 @@ async def request_handler(_context: PlaywrightCrawlingContext) -> None:
188189
await crawler.run(['https://example.com', str(httpbin)])
189190

190191
assert mock_hook.call_count == 2
192+
193+
194+
async def test_proxy_set() -> None:
195+
# Configure crawler with proxy settings
196+
proxy_value = 'http://1111:1111'
197+
crawler = PlaywrightCrawler(proxy_configuration=ProxyConfiguration(proxy_urls=[proxy_value]))
198+
199+
handler_data = {}
200+
201+
mock_handler = mock.AsyncMock(return_value=None)
202+
crawler.router.default_handler(mock_handler)
203+
204+
# Use pre_navigation_hook to verify proxy and configure playwright route
205+
@crawler.pre_navigation_hook
206+
async def some_hook(context: PlaywrightPreNavCrawlingContext) -> None:
207+
if context.proxy_info:
208+
# Store information about the used proxy
209+
handler_data['proxy'] = context.proxy_info.url
210+
211+
# Emulate server response to prevent Playwright from making real requests
212+
await context.page.route('**/*', lambda route: route.fulfill(status=200))
213+
214+
await crawler.run(['https://test.com'])
215+
216+
assert handler_data.get('proxy') == proxy_value

0 commit comments

Comments
 (0)