scrape_config,client: support proxified_response (raw upstream pass-through)

jjsaunier · jjsaunier · commit bb8ca09d6fdd · 2026-04-08T21:27:15.000Z
ScrapeConfig adds a proxified_response: Optional[bool] field that
serializes to the proxified_response query param. When true, the
scrape API returns the raw upstream response body (target's status,
headers, body) instead of the JSON envelope — that's the documented
"use Scrapfly as an HTTP proxy" mode.

The default ScrapeApiResponse parser would crash on the raw body
because it expects {result, context, config}. To support the
proxified mode, client.scrape() now branches: if proxified_response
is true, skip _handle_response() and return the underlying
requests.Response directly. Callers drive it like any HTTP response
and can read X-Scrapfly-* metadata from response.headers
(Api-Cost, Content-Format, Log).

The change is backwards-compatible — customers only opt into the
new return type when they explicitly set proxified_response=true.
diff --git a/scrapfly/client.py b/scrapfly/client.py
@@ -491,6 +491,18 @@ def scrape(self, scrape_config:ScrapeConfig, no_raise:bool=False) -> ScrapeApiRe
             logger.debug('--> %s Scrapping %s' % (scrape_config.method, scrape_config.url))
             request_data = self._scrape_request(scrape_config=scrape_config)
             response = self._http_handler(**request_data)
+
+            if scrape_config.proxified_response is True:
+                # Proxified mode: the API returns the raw upstream response
+                # (target's status, headers, body) instead of the JSON
+                # envelope. Skip ScrapeApiResponse parsing entirely and
+                # return the raw requests.Response so callers can drive
+                # it like any HTTP response. Scrapfly metadata is on the
+                # X-Scrapfly-* headers (Content-Format, Log, Api-Cost).
+                response.raise_for_status()
+                self.reporter.report(scrape_api_response=None)
+                return response
+
             scrape_api_response = self._handle_response(response=response, scrape_config=scrape_config)
 
             self.reporter.report(scrape_api_response=scrape_api_response)
diff --git a/scrapfly/scrape_config.py b/scrapfly/scrape_config.py
@@ -106,6 +106,7 @@ class ScrapeConfig(BaseApiConfig):
     auto_scroll:Optional[bool] = None
     cost_budget:Optional[int] = None
     browser_brand:Optional[str] = None
+    proxified_response:Optional[bool] = None
 
     def __init__(
         self,
@@ -151,7 +152,8 @@ def __init__(
         lang:Optional[List[str]] = None,
         auto_scroll:Optional[bool] = None,
         cost_budget:Optional[int] = None,
-        browser_brand:Optional[str] = None
+        browser_brand:Optional[str] = None,
+        proxified_response:Optional[bool] = None
     ):
         assert(type(url) is str)
 
@@ -205,6 +207,7 @@ def __init__(
         self.auto_scroll = auto_scroll
         self.cost_budget = cost_budget
         self.browser_brand = browser_brand
+        self.proxified_response = proxified_response
 
         if cookies:
             _cookies = []
@@ -262,6 +265,9 @@ def to_api_params(self, key:str) -> Dict:
         if self.cost_budget is not None:
             params['cost_budget'] = self.cost_budget
 
+        if self.proxified_response is not None:
+            params['proxified_response'] = self._bool_to_http(self.proxified_response)
+
         if self.render_js is True:
             params['render_js'] = self._bool_to_http(self.render_js)