55from typing import TYPE_CHECKING , Any , Callable
66
77from pydantic import ValidationError
8+ from yarl import URL
89
910from crawlee import EnqueueStrategy , RequestTransformAction
1011from crawlee ._request import Request , RequestOptions
2223if TYPE_CHECKING :
2324 from collections .abc import AsyncGenerator , Awaitable , Mapping
2425
26+ from playwright .async_api import Page
2527 from typing_extensions import Unpack
2628
2729 from crawlee ._types import BasicCrawlingContext , EnqueueLinksKwargs
@@ -76,6 +78,7 @@ def __init__(
7678 browser_launch_options : Mapping [str , Any ] | None = None ,
7779 browser_new_context_options : Mapping [str , Any ] | None = None ,
7880 headless : bool | None = None ,
81+ use_incognito_pages : bool | None = None ,
7982 ** kwargs : Unpack [BasicCrawlerOptions [PlaywrightCrawlingContext ]],
8083 ) -> None :
8184 """A default constructor.
@@ -94,17 +97,27 @@ def __init__(
9497 This option should not be used if `browser_pool` is provided.
9598 headless: Whether to run the browser in headless mode.
9699 This option should not be used if `browser_pool` is provided.
100+ use_incognito_pages: By default pages share the same browser context. If set to True each page uses its
101+ own context that is destroyed once the page is closed or crashes.
102+ This option should not be used if `browser_pool` is provided.
97103 kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
98104 """
99105 if browser_pool :
100106 # Raise an exception if browser_pool is provided together with other browser-related arguments.
101107 if any (
102108 param is not None
103- for param in (headless , browser_type , browser_launch_options , browser_new_context_options )
109+ for param in (
110+ use_incognito_pages ,
111+ headless ,
112+ browser_type ,
113+ browser_launch_options ,
114+ browser_new_context_options ,
115+ )
104116 ):
105117 raise ValueError (
106- 'You cannot provide `headless`, `browser_type`, `browser_launch_options` or '
107- '`browser_new_context_options` arguments when `browser_pool` is provided.'
118+ 'You cannot provide `headless`, `browser_type`, `browser_launch_options`'
119+ '`browser_new_context_options` or `use_incognito_pages` arguments when '
120+ '`browser_pool` is provided.'
108121 )
109122
110123 # If browser_pool is not provided, create a new instance of BrowserPool with specified arguments.
@@ -114,6 +127,7 @@ def __init__(
114127 browser_type = browser_type ,
115128 browser_launch_options = browser_launch_options ,
116129 browser_new_context_options = browser_new_context_options ,
130+ use_incognito_pages = use_incognito_pages ,
117131 )
118132
119133 self ._browser_pool = browser_pool
@@ -175,6 +189,9 @@ async def _navigate(
175189 infinite_scroll and block_requests).
176190 """
177191 async with context .page :
192+ if context .session :
193+ await self ._set_cookies (context .page , context .request .url , context .session .cookies )
194+
178195 if context .request .headers :
179196 await context .page .set_extra_http_headers (context .request .headers .model_dump ())
180197 # Navigate to the URL and get response.
@@ -186,6 +203,10 @@ async def _navigate(
186203 # Set the loaded URL to the actual URL after redirection.
187204 context .request .loaded_url = context .page .url
188205
206+ if context .session :
207+ cookies = await self ._get_cookies (context .page )
208+ context .session .cookies .update (cookies )
209+
189210 async def enqueue_links (
190211 * ,
191212 selector : str = 'a' ,
@@ -295,3 +316,15 @@ def pre_navigation_hook(self, hook: Callable[[PlaywrightPreNavCrawlingContext],
295316 hook: A coroutine function to be called before each navigation.
296317 """
297318 self ._pre_navigation_hooks .append (hook )
319+
320+ async def _get_cookies (self , page : Page ) -> dict [str , str ]:
321+ """Get the cookies from the page."""
322+ cookies = await page .context .cookies ()
323+ return {cookie ['name' ]: cookie ['value' ] for cookie in cookies if cookie .get ('name' ) and cookie .get ('value' )}
324+
325+ async def _set_cookies (self , page : Page , url : str , cookies : dict [str , str ]) -> None :
326+ """Set the cookies to the page."""
327+ parsed_url = URL (url )
328+ await page .context .add_cookies (
329+ [{'name' : name , 'value' : value , 'domain' : parsed_url .host , 'path' : '/' } for name , value in cookies .items ()]
330+ )
0 commit comments