Skip to content

Commit bb8ca09

Browse files
committed
scrape_config,client: support proxified_response (raw upstream pass-through)
ScrapeConfig adds a proxified_response: Optional[bool] field that serializes to the proxified_response query param. When true, the scrape API returns the raw upstream response body (target's status, headers, body) instead of the JSON envelope — that's the documented "use Scrapfly as an HTTP proxy" mode. The default ScrapeApiResponse parser would crash on the raw body because it expects {result, context, config}. To support the proxified mode, client.scrape() now branches: if proxified_response is true, skip _handle_response() and return the underlying requests.Response directly. Callers drive it like any HTTP response and can read X-Scrapfly-* metadata from response.headers (Api-Cost, Content-Format, Log). The change is backwards-compatible — customers only opt into the new return type when they explicitly set proxified_response=true.
1 parent a3eae4d commit bb8ca09

2 files changed

Lines changed: 19 additions & 1 deletion

File tree

scrapfly/client.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,18 @@ def scrape(self, scrape_config:ScrapeConfig, no_raise:bool=False) -> ScrapeApiRe
491491
logger.debug('--> %s Scrapping %s' % (scrape_config.method, scrape_config.url))
492492
request_data = self._scrape_request(scrape_config=scrape_config)
493493
response = self._http_handler(**request_data)
494+
495+
if scrape_config.proxified_response is True:
496+
# Proxified mode: the API returns the raw upstream response
497+
# (target's status, headers, body) instead of the JSON
498+
# envelope. Skip ScrapeApiResponse parsing entirely and
499+
# return the raw requests.Response so callers can drive
500+
# it like any HTTP response. Scrapfly metadata is on the
501+
# X-Scrapfly-* headers (Content-Format, Log, Api-Cost).
502+
response.raise_for_status()
503+
self.reporter.report(scrape_api_response=None)
504+
return response
505+
494506
scrape_api_response = self._handle_response(response=response, scrape_config=scrape_config)
495507

496508
self.reporter.report(scrape_api_response=scrape_api_response)

scrapfly/scrape_config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class ScrapeConfig(BaseApiConfig):
106106
auto_scroll:Optional[bool] = None
107107
cost_budget:Optional[int] = None
108108
browser_brand:Optional[str] = None
109+
proxified_response:Optional[bool] = None
109110

110111
def __init__(
111112
self,
@@ -151,7 +152,8 @@ def __init__(
151152
lang:Optional[List[str]] = None,
152153
auto_scroll:Optional[bool] = None,
153154
cost_budget:Optional[int] = None,
154-
browser_brand:Optional[str] = None
155+
browser_brand:Optional[str] = None,
156+
proxified_response:Optional[bool] = None
155157
):
156158
assert(type(url) is str)
157159

@@ -205,6 +207,7 @@ def __init__(
205207
self.auto_scroll = auto_scroll
206208
self.cost_budget = cost_budget
207209
self.browser_brand = browser_brand
210+
self.proxified_response = proxified_response
208211

209212
if cookies:
210213
_cookies = []
@@ -262,6 +265,9 @@ def to_api_params(self, key:str) -> Dict:
262265
if self.cost_budget is not None:
263266
params['cost_budget'] = self.cost_budget
264267

268+
if self.proxified_response is not None:
269+
params['proxified_response'] = self._bool_to_http(self.proxified_response)
270+
265271
if self.render_js is True:
266272
params['render_js'] = self._bool_to_http(self.render_js)
267273

0 commit comments

Comments
 (0)