From c71b86541b839024523ec9ffd1e9551010fd6172 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 15:18:12 -0500 Subject: [PATCH 1/3] utils.query: surface unhandled 4xx/5xx and stop mutating the caller's payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related correctness fixes to the shared HTTP wrapper used by every module in the package. * The function only formatted explicit messages for 400, 404, 414, and 500/502/503. Any other status (401, 403, 405, 408, 429, 501, 504, …) fell through and the response was returned as if it had succeeded — callers parsed an HTML error page as RDB or CSV. Add a ``raise_for_status()`` after the explicit branches so every non-success surfaces, while keeping the friendlier messages for the codes we already format. * The body did ``for key, value in payload.items(): payload[key] = to_str(...)``, mutating the caller's dict. List values came back replaced with their stringified joins, breaking any caller that re-used the dict. Build a fresh ``params`` dict in a comprehension and leave the input alone. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/utils.py | 16 +++++++--------- tests/utils_test.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 4aa76a61..36cd0d74 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -163,7 +163,7 @@ def query(url, payload, delimiter=",", ssl_check=True): url: string URL to query payload: dict - query parameters passed to ``requests.get`` + query parameters passed to ``requests.get``. Not mutated. delimiter: string delimiter to use with lists ssl_check: bool @@ -175,17 +175,11 @@ def query(url, payload, delimiter=",", ssl_check=True): string: query response The response from the API query ``requests.get`` function call. """ + params = {key: to_str(value, delimiter) for key, value in payload.items()} - for key, value in payload.items(): - payload[key] = to_str(value, delimiter) - # for index in range(len(payload)): - # key, value = payload[index] - # payload[index] = (key, to_str(value)) - - # define the user agent for the query user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"} - response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check) + response = requests.get(url, params=params, headers=user_agent, verify=ssl_check) if response.status_code == 400: raise ValueError( @@ -218,6 +212,10 @@ def query(url, payload, delimiter=",", ssl_check=True): + f"The service at {response.url} may be down or experiencing issues." ) + # Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...) + # so callers don't silently receive an HTML error page as if it were data. + response.raise_for_status() + if response.text.startswith("No sites/data"): raise NoSitesError(response.url) diff --git a/tests/utils_test.py b/tests/utils_test.py index 4cb9b383..a4edc7c2 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -41,6 +41,37 @@ def test_header(self): assert response.status_code == 200 # GET was successful assert "user-agent" in response.request.headers + def test_does_not_mutate_caller_payload(self, requests_mock): + """`query` must not mutate the caller's payload dict. + + Regression: previously the function did + ``payload[key] = to_str(value, delimiter)`` in place, so list + values were overwritten with their stringified joins after the + call returned. + """ + url = "https://example.com/svc" + requests_mock.get(url, text="ok") + payload = {"sites": ["A", "B"], "stateCd": "MD"} + original = {k: v for k, v in payload.items()} + + utils.query(url, payload) + + assert payload == original + assert payload["sites"] is original["sites"] + + @pytest.mark.parametrize("status", [401, 403, 405, 408, 429, 501, 504]) + def test_unhandled_4xx_5xx_raises(self, requests_mock, status): + """`query` must surface every 4xx/5xx, not just the few it formats. + + Regression: codes outside {400, 404, 414, 500, 502, 503} used to + pass through silently — callers received the error body as if + it were data. + """ + url = "https://example.com/svc" + requests_mock.get(url, status_code=status, text="denied") + with pytest.raises(Exception): # noqa: B017 -- HTTPError or ValueError + utils.query(url, {"k": "v"}) + class Test_BaseMetadata: """Tests of BaseMetadata""" From 5b8a6614bf75c0e9cbe3496f94e559d0fd8548f1 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 10:10:48 -0500 Subject: [PATCH 2/3] Standardize exception type and document return for query() Per copilot review on PR #253: - Re-raise the catch-all 4xx/5xx as ValueError with status/reason/url for parity with the explicit 400/404/414/500/502/503 branches; callers no longer need to handle both ValueError and HTTPError from the same helper. - Update the Returns section to document the actual return type (requests.Response) and the ValueError raise contract. - Narrow the test from pytest.raises(Exception) to pytest.raises( ValueError) with a match on the status code to assert the contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/utils.py | 20 +++++++++++++++++--- tests/utils_test.py | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 36cd0d74..ebc35537 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -172,8 +172,17 @@ def query(url, payload, delimiter=",", ssl_check=True): Returns ------- - string: query response - The response from the API query ``requests.get`` function call. + response : ``requests.Response`` + The response object from the underlying ``requests.get`` call. + + Raises + ------ + ValueError + For any non-success HTTP status (4xx/5xx). The message includes + the status code, reason, and URL. Specific guidance is included + for 400, 404, 414, and 500/502/503 statuses; any other 4xx/5xx + falls through to a generic ValueError so callers don't silently + receive an HTML error page as if it were data. """ params = {key: to_str(value, delimiter) for key, value in payload.items()} @@ -214,7 +223,12 @@ def query(url, payload, delimiter=",", ssl_check=True): # Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...) # so callers don't silently receive an HTML error page as if it were data. - response.raise_for_status() + # Re-raise as ValueError to keep the exception contract uniform with the + # explicit status branches above. + if not response.ok: + raise ValueError( + f"HTTP {response.status_code} {response.reason} for {response.url}" + ) if response.text.startswith("No sites/data"): raise NoSitesError(response.url) diff --git a/tests/utils_test.py b/tests/utils_test.py index a4edc7c2..28c610f7 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -69,7 +69,7 @@ def test_unhandled_4xx_5xx_raises(self, requests_mock, status): """ url = "https://example.com/svc" requests_mock.get(url, status_code=status, text="denied") - with pytest.raises(Exception): # noqa: B017 -- HTTPError or ValueError + with pytest.raises(ValueError, match=str(status)): utils.query(url, {"k": "v"}) From 80425a1887f30baccac7f7c1038c014e3d9a9a9d Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Tue, 12 May 2026 19:38:24 -0500 Subject: [PATCH 3/3] Trim verbose comments in utils.query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Tighten the Raises section in the docstring from 5 lines to 2; the status code, reason, and URL message contract is the load-bearing promise — the implementation-detail "specific guidance for X" was redundant with the elif branches just below. - Drop the 4-line block comment above the catch-all that restated what the Raises section already says. - In tests, use dict(payload) instead of a dict comprehension shallow copy, and tighten match=str(status) to match=rf"HTTP {status}\b" so the assertion can't be accidentally satisfied by digits appearing elsewhere in the message. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/utils.py | 11 ++--------- tests/utils_test.py | 4 ++-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index ebc35537..eb52374b 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -178,11 +178,8 @@ def query(url, payload, delimiter=",", ssl_check=True): Raises ------ ValueError - For any non-success HTTP status (4xx/5xx). The message includes - the status code, reason, and URL. Specific guidance is included - for 400, 404, 414, and 500/502/503 statuses; any other 4xx/5xx - falls through to a generic ValueError so callers don't silently - receive an HTML error page as if it were data. + For any non-success HTTP status (4xx/5xx); the message includes + the status code, reason, and URL. """ params = {key: to_str(value, delimiter) for key, value in payload.items()} @@ -221,10 +218,6 @@ def query(url, payload, delimiter=",", ssl_check=True): + f"The service at {response.url} may be down or experiencing issues." ) - # Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...) - # so callers don't silently receive an HTML error page as if it were data. - # Re-raise as ValueError to keep the exception contract uniform with the - # explicit status branches above. if not response.ok: raise ValueError( f"HTTP {response.status_code} {response.reason} for {response.url}" diff --git a/tests/utils_test.py b/tests/utils_test.py index 28c610f7..79b0cfd6 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -52,7 +52,7 @@ def test_does_not_mutate_caller_payload(self, requests_mock): url = "https://example.com/svc" requests_mock.get(url, text="ok") payload = {"sites": ["A", "B"], "stateCd": "MD"} - original = {k: v for k, v in payload.items()} + original = dict(payload) utils.query(url, payload) @@ -69,7 +69,7 @@ def test_unhandled_4xx_5xx_raises(self, requests_mock, status): """ url = "https://example.com/svc" requests_mock.get(url, status_code=status, text="denied") - with pytest.raises(ValueError, match=str(status)): + with pytest.raises(ValueError, match=rf"HTTP {status}\b"): utils.query(url, {"k": "v"})