From 9baf002cad4ac823ed5c0197b2b7a584f9a00c22 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 13 Apr 2026 17:53:25 -0500 Subject: [PATCH 01/21] Validate monitoring_location_id format in waterdata functions Passing an integer (e.g. 5129115) or a bare string without an agency prefix (e.g. "dog") to any waterdata function silently wasted an API call and returned empty data. Now all ten public functions that accept monitoring_location_id raise before touching the network: - TypeError if the value is not a string or list of strings - ValueError if any string doesn't match the 'AGENCY-ID' format (e.g. 'USGS-01646500') Closes #188. Co-Authored-By: Claude Sonnet 4.6 --- dataretrieval/waterdata/api.py | 11 +++++++ dataretrieval/waterdata/utils.py | 51 ++++++++++++++++++++++++++++++++ tests/waterdata_test.py | 51 +++++++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 0e5cfc8c..9f51d99c 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -26,6 +26,7 @@ ) from dataretrieval.waterdata.utils import ( SAMPLES_URL, + _check_monitoring_location_id, _check_profiles, _default_headers, _get_args, @@ -230,6 +231,7 @@ def get_daily( ... last_modified="P7D", ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "daily" output_id = "daily_id" @@ -418,6 +420,7 @@ def get_continuous( ... filter_lang="cql-text", ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "continuous" output_id = "continuous_id" @@ -716,6 +719,7 @@ def get_monitoring_locations( ... properties=["monitoring_location_id", "state_name", "country_name"], ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "monitoring-locations" output_id = "monitoring_location_id" @@ -939,6 +943,7 @@ def get_time_series_metadata( ... begin="1990-01-01/..", ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "time-series-metadata" output_id = "time_series_id" @@ -1366,6 +1371,7 @@ def get_latest_continuous( ... monitoring_location_id=["USGS-05114000", "USGS-09423350"] ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "latest-continuous" output_id = "latest_continuous_id" @@ -1562,6 +1568,7 @@ def get_latest_daily( ... monitoring_location_id=["USGS-05114000", "USGS-09423350"] ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "latest-daily" output_id = "latest_daily_id" @@ -1752,6 +1759,7 @@ def get_field_measurements( ... time="P20Y", ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "field-measurements" output_id = "field_measurement_id" @@ -2524,6 +2532,7 @@ def get_stats_por( ... ) """ # Build argument dictionary, omitting None values + _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( @@ -2653,6 +2662,7 @@ def get_stats_date_range( ... ) """ # Build argument dictionary, omitting None values + _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( @@ -2825,6 +2835,7 @@ def get_channel( ... monitoring_location_id="USGS-02238500", ... ) """ + _check_monitoring_location_id(monitoring_location_id) service = "channel-measurements" output_id = "channel_measurements_id" diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 378b864b..6170ba0c 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1168,6 +1168,57 @@ def _check_profiles( ) +_MONITORING_LOCATION_ID_RE = re.compile(r"^.+-.+$") + + +def _check_monitoring_location_id( + monitoring_location_id: str | list[str] | None, +) -> None: + """Validate the format of a monitoring_location_id value. + + Parameters + ---------- + monitoring_location_id : str, list of str, or None + One or more monitoring location identifiers. + + Raises + ------ + TypeError + If any identifier is not a string (e.g. an integer was passed). + ValueError + If any string identifier does not follow the required + ``'AGENCY-ID'`` format (e.g. ``'USGS-01646500'``). + """ + if monitoring_location_id is None: + return + + if not isinstance(monitoring_location_id, (str, list)): + raise TypeError( + f"monitoring_location_id must be a string or list of strings, " + f"not {type(monitoring_location_id).__name__}. " + f"Expected format: 'AGENCY-ID', e.g., 'USGS-{monitoring_location_id}'." + ) + + ids = ( + [monitoring_location_id] + if isinstance(monitoring_location_id, str) + else monitoring_location_id + ) + + for id_ in ids: + if not isinstance(id_, str): + raise TypeError( + f"monitoring_location_id must be a string or list of strings, " + f"not {type(id_).__name__}. " + f"Expected format: 'AGENCY-ID', e.g., 'USGS-{id_}'." + ) + if not _MONITORING_LOCATION_ID_RE.match(id_): + raise ValueError( + f"Invalid monitoring_location_id: {id_!r}. " + f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." + ) + + def _get_args( local_vars: dict[str, Any], exclude: set[str] | None = None ) -> dict[str, Any]: diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index b53ee296..b6f04a83 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -25,7 +25,7 @@ get_stats_por, get_time_series_metadata, ) -from dataretrieval.waterdata.utils import _check_profiles +from dataretrieval.waterdata.utils import _check_monitoring_location_id, _check_profiles def mock_request(requests_mock, request_url, file_path): @@ -504,3 +504,52 @@ def test_get_channel(): assert df.shape[0] > 470 assert df.shape[1] == 27 # if geopandas installed, 21 columns if not assert "channel_measurements_id" in df.columns + + +class TestCheckMonitoringLocationId: + """Tests for _check_monitoring_location_id input validation. + + Regression tests for GitHub issue #188. + """ + + def test_valid_string(self): + """A correctly formatted string passes without error.""" + _check_monitoring_location_id("USGS-01646500") + + def test_valid_list(self): + """A list of correctly formatted strings passes without error.""" + _check_monitoring_location_id(["USGS-01646500", "USGS-02238500"]) + + def test_none_passes(self): + """None is allowed (optional parameter).""" + _check_monitoring_location_id(None) + + def test_integer_raises_type_error(self): + """An integer ID raises TypeError with a helpful message.""" + with pytest.raises(TypeError, match="not int"): + _check_monitoring_location_id(5129115) + + def test_integer_in_list_raises_type_error(self): + """An integer inside a list raises TypeError.""" + with pytest.raises(TypeError, match="not int"): + _check_monitoring_location_id(["USGS-01646500", 5129115]) + + def test_missing_agency_prefix_raises_value_error(self): + """A string without the AGENCY- prefix raises ValueError.""" + with pytest.raises(ValueError, match="Invalid monitoring_location_id"): + _check_monitoring_location_id("dog") + + def test_bare_site_number_raises_value_error(self): + """A bare site number string (no agency prefix) raises ValueError.""" + with pytest.raises(ValueError, match="Invalid monitoring_location_id"): + _check_monitoring_location_id("01646500") + + def test_get_daily_integer_id_raises(self): + """get_daily raises TypeError before making any network call.""" + with pytest.raises(TypeError): + get_daily(monitoring_location_id=5129115, parameter_code="00060") + + def test_get_daily_malformed_id_raises(self): + """get_daily raises ValueError for a malformed string ID.""" + with pytest.raises(ValueError): + get_daily(monitoring_location_id="dog", parameter_code="00060") From 8d5d7e41c1feead15b7c5a7e51e5eb60e7b15a19 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 00:07:04 -0500 Subject: [PATCH 02/21] Widen _check_monitoring_location_id to accept iterables of strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous `(str, list)` isinstance check rejected legitimate inputs: tuple, pandas.Series, pandas.Index, numpy.ndarray, generators. Pre-fix these would round-trip through requests and either work (tuple) or silently break the URL (numpy/pandas). Post-fix the function: - Accepts any non-string iterable whose elements are strings - Materializes the iterable to a list so downstream comma-join / POST-CQL2 logic in _construct_api_requests keeps working uniformly - Returns the (possibly-normalized) value, so callers reassign: monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - Rejects Mapping (e.g. dict) explicitly — iterating a dict yields keys, which is a footgun Live-verified against api.waterdata.usgs.gov: passing tuple, pd.Series, pd.Index, and np.ndarray of "USGS-01646500" all return 3 rows for the 2024-06-01/2024-06-03 window. Passing pd.Series([1646500]) raises TypeError("monitoring_location_id elements must be strings, not int...") before any network call. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/api.py | 20 ++++++------ dataretrieval/waterdata/utils.py | 50 +++++++++++++++++----------- tests/waterdata_test.py | 56 +++++++++++++++++++++++++++++--- 3 files changed, 93 insertions(+), 33 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 9f51d99c..1f4ae3ac 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -231,7 +231,7 @@ def get_daily( ... last_modified="P7D", ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "daily" output_id = "daily_id" @@ -420,7 +420,7 @@ def get_continuous( ... filter_lang="cql-text", ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "continuous" output_id = "continuous_id" @@ -719,7 +719,7 @@ def get_monitoring_locations( ... properties=["monitoring_location_id", "state_name", "country_name"], ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "monitoring-locations" output_id = "monitoring_location_id" @@ -943,7 +943,7 @@ def get_time_series_metadata( ... begin="1990-01-01/..", ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "time-series-metadata" output_id = "time_series_id" @@ -1371,7 +1371,7 @@ def get_latest_continuous( ... monitoring_location_id=["USGS-05114000", "USGS-09423350"] ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "latest-continuous" output_id = "latest_continuous_id" @@ -1568,7 +1568,7 @@ def get_latest_daily( ... monitoring_location_id=["USGS-05114000", "USGS-09423350"] ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "latest-daily" output_id = "latest_daily_id" @@ -1759,7 +1759,7 @@ def get_field_measurements( ... time="P20Y", ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "field-measurements" output_id = "field_measurement_id" @@ -2532,7 +2532,7 @@ def get_stats_por( ... ) """ # Build argument dictionary, omitting None values - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( @@ -2662,7 +2662,7 @@ def get_stats_date_range( ... ) """ # Build argument dictionary, omitting None values - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( @@ -2835,7 +2835,7 @@ def get_channel( ... monitoring_location_id="USGS-02238500", ... ) """ - _check_monitoring_location_id(monitoring_location_id) + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "channel-measurements" output_id = "channel_measurements_id" diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 6170ba0c..e1e36722 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -4,6 +4,7 @@ import logging import os import re +from collections.abc import Iterable, Mapping from datetime import datetime from typing import Any, get_args from zoneinfo import ZoneInfo @@ -1171,44 +1172,53 @@ def _check_profiles( _MONITORING_LOCATION_ID_RE = re.compile(r"^.+-.+$") -def _check_monitoring_location_id( - monitoring_location_id: str | list[str] | None, -) -> None: - """Validate the format of a monitoring_location_id value. +def _check_monitoring_location_id(monitoring_location_id): + """Validate and normalize a ``monitoring_location_id`` value. Parameters ---------- - monitoring_location_id : str, list of str, or None - One or more monitoring location identifiers. + monitoring_location_id : None, str, or iterable of str + Accepts ``None``, a single string, or any non-string iterable of + strings (``list``, ``tuple``, ``pandas.Series``, ``pandas.Index``, + ``numpy.ndarray``, ...). Iterables are materialized to a ``list`` + so downstream code that branches on ``isinstance(v, list)`` keeps + working. + + Returns + ------- + None, str, or list of str + ``None`` and ``str`` inputs are returned unchanged; non-string + iterables are returned as a ``list``. Raises ------ TypeError - If any identifier is not a string (e.g. an integer was passed). + If the input is not ``None``, a string, or an iterable, or if any + iterable element is not a string. ValueError - If any string identifier does not follow the required - ``'AGENCY-ID'`` format (e.g. ``'USGS-01646500'``). + If any identifier doesn't contain a hyphen separator + (per the OGC API spec: AGENCY-ID format, e.g. ``USGS-01646500``). """ if monitoring_location_id is None: - return + return None - if not isinstance(monitoring_location_id, (str, list)): + if isinstance(monitoring_location_id, str): + ids = [monitoring_location_id] + elif isinstance(monitoring_location_id, Iterable) and not isinstance( + monitoring_location_id, Mapping + ): + ids = list(monitoring_location_id) + else: raise TypeError( - f"monitoring_location_id must be a string or list of strings, " + f"monitoring_location_id must be a string or iterable of strings, " f"not {type(monitoring_location_id).__name__}. " f"Expected format: 'AGENCY-ID', e.g., 'USGS-{monitoring_location_id}'." ) - ids = ( - [monitoring_location_id] - if isinstance(monitoring_location_id, str) - else monitoring_location_id - ) - for id_ in ids: if not isinstance(id_, str): raise TypeError( - f"monitoring_location_id must be a string or list of strings, " + f"monitoring_location_id elements must be strings, " f"not {type(id_).__name__}. " f"Expected format: 'AGENCY-ID', e.g., 'USGS-{id_}'." ) @@ -1218,6 +1228,8 @@ def _check_monitoring_location_id( f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." ) + return monitoring_location_id if isinstance(monitoring_location_id, str) else ids + def _get_args( local_vars: dict[str, Any], exclude: set[str] | None = None diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index b6f04a83..18fe52aa 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -1,6 +1,7 @@ import datetime import sys +import pandas as pd import pytest from pandas import DataFrame @@ -513,16 +514,17 @@ class TestCheckMonitoringLocationId: """ def test_valid_string(self): - """A correctly formatted string passes without error.""" - _check_monitoring_location_id("USGS-01646500") + """A correctly formatted string passes and is returned unchanged.""" + assert _check_monitoring_location_id("USGS-01646500") == "USGS-01646500" def test_valid_list(self): """A list of correctly formatted strings passes without error.""" - _check_monitoring_location_id(["USGS-01646500", "USGS-02238500"]) + ids = ["USGS-01646500", "USGS-02238500"] + assert _check_monitoring_location_id(ids) == ids def test_none_passes(self): """None is allowed (optional parameter).""" - _check_monitoring_location_id(None) + assert _check_monitoring_location_id(None) is None def test_integer_raises_type_error(self): """An integer ID raises TypeError with a helpful message.""" @@ -549,6 +551,52 @@ def test_get_daily_integer_id_raises(self): with pytest.raises(TypeError): get_daily(monitoring_location_id=5129115, parameter_code="00060") + def test_tuple_normalizes_to_list(self): + """A tuple of valid strings is accepted and normalized to list.""" + result = _check_monitoring_location_id(("USGS-01646500", "USGS-02238500")) + assert result == ["USGS-01646500", "USGS-02238500"] + assert isinstance(result, list) + + def test_pandas_series_normalizes_to_list(self): + """A pandas.Series of valid strings is accepted and normalized to list.""" + s = pd.Series(["USGS-01646500", "USGS-02238500"]) + result = _check_monitoring_location_id(s) + assert result == ["USGS-01646500", "USGS-02238500"] + assert isinstance(result, list) + + def test_pandas_index_normalizes_to_list(self): + """A pandas.Index of valid strings is accepted and normalized to list.""" + idx = pd.Index(["USGS-01646500", "USGS-02238500"]) + result = _check_monitoring_location_id(idx) + assert result == ["USGS-01646500", "USGS-02238500"] + assert isinstance(result, list) + + def test_numpy_array_normalizes_to_list(self): + """A numpy.ndarray of valid strings is accepted and normalized to list.""" + import numpy as np + + arr = np.array(["USGS-01646500", "USGS-02238500"]) + result = _check_monitoring_location_id(arr) + assert result == ["USGS-01646500", "USGS-02238500"] + assert isinstance(result, list) + + def test_numpy_int_array_raises_type_error(self): + """An iterable whose elements aren't strings (numpy int array) raises.""" + import numpy as np + + with pytest.raises(TypeError, match="elements must be strings"): + _check_monitoring_location_id(np.array([1, 2, 3])) + + def test_pandas_series_of_ints_raises_type_error(self): + """An iterable whose elements aren't strings (Series of ints) raises.""" + with pytest.raises(TypeError, match="elements must be strings"): + _check_monitoring_location_id(pd.Series([1, 2, 3])) + + def test_dict_raises_type_error(self): + """Mappings are rejected — iterating a dict yields keys, which is a footgun.""" + with pytest.raises(TypeError, match="not dict"): + _check_monitoring_location_id({"USGS-01646500": "site"}) + def test_get_daily_malformed_id_raises(self): """get_daily raises ValueError for a malformed string ID.""" with pytest.raises(ValueError): From 36ca0befda6ddde9b109785b8fd815ceb56d9fba Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 00:10:51 -0500 Subject: [PATCH 03/21] Tidy _check_monitoring_location_id (clearer dispatch, helper, type hints) Internal refactor; behavior unchanged. - Add full type hints: ``str | Iterable[str] | None`` in, ``str | list[str] | None`` out, mirroring the runtime contract. - Split into explicit fast paths instead of wrapping-then-unwrapping: the string case validates and returns directly, eliminating the throwaway ``[monitoring_location_id]`` list and the final ``isinstance(str)`` re-check at return. - Invert the Mapping/Iterable check: reject ``Mapping`` (and non-iterables) up front, then ``list()`` the iterable. Reads more linearly than the compound ``isinstance(Iterable) and not isinstance(Mapping)``. - Extract ``_check_id_format(value)`` for the regex/ValueError pair so the format rule lives in one place and the call sites are one-liners. Tests unchanged; 16 validator tests + full suite still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/utils.py | 50 ++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index e1e36722..96782f54 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1172,29 +1172,32 @@ def _check_profiles( _MONITORING_LOCATION_ID_RE = re.compile(r"^.+-.+$") -def _check_monitoring_location_id(monitoring_location_id): +def _check_monitoring_location_id( + monitoring_location_id: str | Iterable[str] | None, +) -> str | list[str] | None: """Validate and normalize a ``monitoring_location_id`` value. Parameters ---------- monitoring_location_id : None, str, or iterable of str - Accepts ``None``, a single string, or any non-string iterable of - strings (``list``, ``tuple``, ``pandas.Series``, ``pandas.Index``, - ``numpy.ndarray``, ...). Iterables are materialized to a ``list`` - so downstream code that branches on ``isinstance(v, list)`` keeps - working. + ``None``, a single AGENCY-ID string, or any non-string, + non-``Mapping`` iterable of such strings (``list``, ``tuple``, + ``pandas.Series``, ``pandas.Index``, ``numpy.ndarray``, ...). + ``Mapping`` types are rejected because iterating a mapping yields + keys, which would be a footgun. Returns ------- None, str, or list of str - ``None`` and ``str`` inputs are returned unchanged; non-string - iterables are returned as a ``list``. + ``None`` and ``str`` are returned unchanged; iterables are + materialized to a ``list`` so downstream code that branches on + ``isinstance(v, list)`` keeps working. Raises ------ TypeError - If the input is not ``None``, a string, or an iterable, or if any - iterable element is not a string. + If the input isn't ``None``, ``str``, or a non-``Mapping`` + iterable; or if any iterable element isn't a string. ValueError If any identifier doesn't contain a hyphen separator (per the OGC API spec: AGENCY-ID format, e.g. ``USGS-01646500``). @@ -1203,18 +1206,19 @@ def _check_monitoring_location_id(monitoring_location_id): return None if isinstance(monitoring_location_id, str): - ids = [monitoring_location_id] - elif isinstance(monitoring_location_id, Iterable) and not isinstance( - monitoring_location_id, Mapping + _check_id_format(monitoring_location_id) + return monitoring_location_id + + if isinstance(monitoring_location_id, Mapping) or not isinstance( + monitoring_location_id, Iterable ): - ids = list(monitoring_location_id) - else: raise TypeError( f"monitoring_location_id must be a string or iterable of strings, " f"not {type(monitoring_location_id).__name__}. " f"Expected format: 'AGENCY-ID', e.g., 'USGS-{monitoring_location_id}'." ) + ids = list(monitoring_location_id) for id_ in ids: if not isinstance(id_, str): raise TypeError( @@ -1222,13 +1226,17 @@ def _check_monitoring_location_id(monitoring_location_id): f"not {type(id_).__name__}. " f"Expected format: 'AGENCY-ID', e.g., 'USGS-{id_}'." ) - if not _MONITORING_LOCATION_ID_RE.match(id_): - raise ValueError( - f"Invalid monitoring_location_id: {id_!r}. " - f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." - ) + _check_id_format(id_) + return ids - return monitoring_location_id if isinstance(monitoring_location_id, str) else ids + +def _check_id_format(value: str) -> None: + """Raise ``ValueError`` if ``value`` is not in ``AGENCY-ID`` format.""" + if not _MONITORING_LOCATION_ID_RE.match(value): + raise ValueError( + f"Invalid monitoring_location_id: {value!r}. " + f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." + ) def _get_args( From b5b7f94775023de53b4eda6c511f473ee2c67cf3 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 08:30:19 -0500 Subject: [PATCH 04/21] Address Copilot review on PR #229 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. TypeError messages now use a fixed example ("USGS-01646500") instead of interpolating the user's value into the suggestion template. The offending value is still surfaced via "(got {value!r})" so the user can see what they actually passed — but mappings, large objects, etc. no longer produce ugly suggestion strings like "USGS-{'k': 'v'}". 2. _MONITORING_LOCATION_ID_RE switched from anchored `^.+-.+$` matched with `re.match` to un-anchored `.+-.+` matched with `re.fullmatch`. Same effective behavior, but `fullmatch` makes the intent explicit and removes the redundant anchor characters. 3. Widened the type annotations on all 10 public waterdata functions that accept `monitoring_location_id` from `str | list[str] | None` to `str | Iterable[str] | None`, matching the runtime contract that accepts tuples, pandas.Series, pandas.Index, numpy.ndarray, etc. Added `from collections.abc import Iterable` to api.py. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/api.py | 21 +++++++++++---------- dataretrieval/waterdata/utils.py | 13 +++++++------ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 1f4ae3ac..6a06def8 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -8,6 +8,7 @@ import json import logging +from collections.abc import Iterable from io import StringIO from typing import get_args from urllib.parse import quote @@ -39,7 +40,7 @@ def get_daily( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | list[str] | None = None, statistic_id: str | list[str] | None = None, properties: list[str] | None = None, @@ -242,7 +243,7 @@ def get_daily( def get_continuous( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | list[str] | None = None, statistic_id: str | list[str] | None = None, properties: list[str] | None = None, @@ -431,7 +432,7 @@ def get_continuous( def get_monitoring_locations( - monitoring_location_id: list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, agency_code: list[str] | None = None, agency_name: list[str] | None = None, monitoring_location_number: list[str] | None = None, @@ -730,7 +731,7 @@ def get_monitoring_locations( def get_time_series_metadata( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | list[str] | None = None, parameter_name: str | list[str] | None = None, properties: str | list[str] | None = None, @@ -1186,7 +1187,7 @@ def get_combined_metadata( def get_latest_continuous( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | list[str] | None = None, statistic_id: str | list[str] | None = None, properties: str | list[str] | None = None, @@ -1382,7 +1383,7 @@ def get_latest_continuous( def get_latest_daily( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | list[str] | None = None, statistic_id: str | list[str] | None = None, properties: str | list[str] | None = None, @@ -1579,7 +1580,7 @@ def get_latest_daily( def get_field_measurements( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | list[str] | None = None, observing_procedure_code: str | list[str] | None = None, properties: list[str] | None = None, @@ -2423,7 +2424,7 @@ def get_stats_por( county_code: str | list[str] | None = None, start_date: str | None = None, end_date: str | None = None, - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, page_size: int = 1000, parent_time_series_id: str | list[str] | None = None, site_type_code: str | list[str] | None = None, @@ -2548,7 +2549,7 @@ def get_stats_date_range( county_code: str | list[str] | None = None, start_date: str | None = None, end_date: str | None = None, - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, page_size: int = 1000, parent_time_series_id: str | list[str] | None = None, site_type_code: str | list[str] | None = None, @@ -2673,7 +2674,7 @@ def get_stats_date_range( def get_channel( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, field_visit_id: str | list[str] | None = None, measurement_number: str | list[str] | None = None, time: str | list[str] | None = None, diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 96782f54..e05a9120 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1169,7 +1169,7 @@ def _check_profiles( ) -_MONITORING_LOCATION_ID_RE = re.compile(r"^.+-.+$") +_MONITORING_LOCATION_ID_RE = re.compile(r".+-.+") def _check_monitoring_location_id( @@ -1214,8 +1214,9 @@ def _check_monitoring_location_id( ): raise TypeError( f"monitoring_location_id must be a string or iterable of strings, " - f"not {type(monitoring_location_id).__name__}. " - f"Expected format: 'AGENCY-ID', e.g., 'USGS-{monitoring_location_id}'." + f"not {type(monitoring_location_id).__name__} " + f"(got {monitoring_location_id!r}). " + f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." ) ids = list(monitoring_location_id) @@ -1223,8 +1224,8 @@ def _check_monitoring_location_id( if not isinstance(id_, str): raise TypeError( f"monitoring_location_id elements must be strings, " - f"not {type(id_).__name__}. " - f"Expected format: 'AGENCY-ID', e.g., 'USGS-{id_}'." + f"not {type(id_).__name__} (got {id_!r}). " + f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." ) _check_id_format(id_) return ids @@ -1232,7 +1233,7 @@ def _check_monitoring_location_id( def _check_id_format(value: str) -> None: """Raise ``ValueError`` if ``value`` is not in ``AGENCY-ID`` format.""" - if not _MONITORING_LOCATION_ID_RE.match(value): + if not _MONITORING_LOCATION_ID_RE.fullmatch(value): raise ValueError( f"Invalid monitoring_location_id: {value!r}. " f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." From df3c108e5a4e269d959710a56efbcea1858d4c27 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 08:50:15 -0500 Subject: [PATCH 05/21] Apply _normalize_str_iterable to every multi-value string parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts the type-and-iterable normalization out of _check_monitoring_location_id into a reusable helper, then wires it into every public waterdata getter at every multi-value string parameter. Closes the gap noted in PR review: previously only `monitoring_location_id` accepted pd.Series / np.ndarray / tuple; other params (parameter_code, statistic_id, state_name, etc.) still silently str-serialized non-list iterables into the request. Changes: - dataretrieval/waterdata/utils.py: new `_normalize_str_iterable(value, param_name)` does the None / str / non-Mapping-Iterable dispatch and per-element type check. `_check_monitoring_location_id` now wraps it and adds the AGENCY-ID hyphen check, which remains monitoring_location_id-specific. - dataretrieval/waterdata/api.py: 153 normalization calls inserted across 11 public functions (get_daily, get_continuous, get_monitoring_locations, get_time_series_metadata, get_latest_continuous, get_latest_daily, get_field_measurements, get_samples, get_stats_por, get_stats_date_range, get_channel). 170 type annotations widened from `str | list[str] | None` / `list[str] | None` to `str | Iterable[str] | None`. Added Iterable import and `_normalize_str_iterable` to imports. Excluded (kept as-is): time-range params (time, last_modified, begin, end, begin_utc, end_utc, datetime) — these have special semantics in _format_api_dates (single-string or two-element range). Out of scope; users who need iterable support there can `.tolist()`. - tests/waterdata_test.py: new TestNormalizeStrIterable class with 10 tests covering str / list / tuple / pd.Series / pd.Index / np.ndarray acceptance, plus int / dict rejection, plus an integration check (mock.patch on get_ogc_data) confirming that passing pd.Series for parameter_code arrives at the inner call as a list, not a stringified Series. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/api.py | 580 ++++++++++++++++++++++--------- dataretrieval/waterdata/utils.py | 101 ++++-- tests/waterdata_test.py | 79 ++++- 3 files changed, 555 insertions(+), 205 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 6a06def8..6cc59db4 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -31,6 +31,7 @@ _check_profiles, _default_headers, _get_args, + _normalize_str_iterable, get_ogc_data, get_stats_data, ) @@ -41,18 +42,18 @@ def get_daily( monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | list[str] | None = None, - statistic_id: str | list[str] | None = None, - properties: list[str] | None = None, - time_series_id: str | list[str] | None = None, - daily_id: str | list[str] | None = None, - approval_status: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - qualifier: str | list[str] | None = None, - value: str | list[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + daily_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, + value: str | Iterable[str] | None = None, last_modified: str | None = None, skip_geometry: bool | None = None, - time: str | list[str] | None = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -233,6 +234,15 @@ def get_daily( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") + statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") + properties = _normalize_str_iterable(properties, "properties") + time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") + daily_id = _normalize_str_iterable(daily_id, "daily_id") + approval_status = _normalize_str_iterable(approval_status, "approval_status") + unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") + qualifier = _normalize_str_iterable(qualifier, "qualifier") + value = _normalize_str_iterable(value, "value") service = "daily" output_id = "daily_id" @@ -244,17 +254,17 @@ def get_daily( def get_continuous( monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | list[str] | None = None, - statistic_id: str | list[str] | None = None, - properties: list[str] | None = None, - time_series_id: str | list[str] | None = None, - continuous_id: str | list[str] | None = None, - approval_status: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - qualifier: str | list[str] | None = None, - value: str | list[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + continuous_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, + value: str | Iterable[str] | None = None, last_modified: str | None = None, - time: str | list[str] | None = None, + time: str | Iterable[str] | None = None, limit: int | None = None, filter: str | None = None, filter_lang: FILTER_LANG | None = None, @@ -422,6 +432,15 @@ def get_continuous( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") + statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") + properties = _normalize_str_iterable(properties, "properties") + time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") + continuous_id = _normalize_str_iterable(continuous_id, "continuous_id") + approval_status = _normalize_str_iterable(approval_status, "approval_status") + unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") + qualifier = _normalize_str_iterable(qualifier, "qualifier") + value = _normalize_str_iterable(value, "value") service = "continuous" output_id = "continuous_id" @@ -433,48 +452,48 @@ def get_continuous( def get_monitoring_locations( monitoring_location_id: str | Iterable[str] | None = None, - agency_code: list[str] | None = None, - agency_name: list[str] | None = None, - monitoring_location_number: list[str] | None = None, - monitoring_location_name: list[str] | None = None, - district_code: list[str] | None = None, - country_code: list[str] | None = None, - country_name: list[str] | None = None, - state_code: list[str] | None = None, - state_name: list[str] | None = None, - county_code: list[str] | None = None, - county_name: list[str] | None = None, - minor_civil_division_code: list[str] | None = None, - site_type_code: list[str] | None = None, - site_type: list[str] | None = None, - hydrologic_unit_code: list[str] | None = None, - basin_code: list[str] | None = None, - altitude: list[str] | None = None, - altitude_accuracy: list[str] | None = None, - altitude_method_code: list[str] | None = None, - altitude_method_name: list[str] | None = None, - vertical_datum: list[str] | None = None, - vertical_datum_name: list[str] | None = None, - horizontal_positional_accuracy_code: list[str] | None = None, - horizontal_positional_accuracy: list[str] | None = None, - horizontal_position_method_code: list[str] | None = None, - horizontal_position_method_name: list[str] | None = None, - original_horizontal_datum: list[str] | None = None, - original_horizontal_datum_name: list[str] | None = None, - drainage_area: list[str] | None = None, - contributing_drainage_area: list[str] | None = None, - time_zone_abbreviation: list[str] | None = None, - uses_daylight_savings: list[str] | None = None, - construction_date: list[str] | None = None, - aquifer_code: list[str] | None = None, - national_aquifer_code: list[str] | None = None, - aquifer_type_code: list[str] | None = None, - well_constructed_depth: list[str] | None = None, - hole_constructed_depth: list[str] | None = None, - depth_source_code: list[str] | None = None, - properties: list[str] | None = None, + agency_code: str | Iterable[str] | None = None, + agency_name: str | Iterable[str] | None = None, + monitoring_location_number: str | Iterable[str] | None = None, + monitoring_location_name: str | Iterable[str] | None = None, + district_code: str | Iterable[str] | None = None, + country_code: str | Iterable[str] | None = None, + country_name: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + state_name: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, + county_name: str | Iterable[str] | None = None, + minor_civil_division_code: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type: str | Iterable[str] | None = None, + hydrologic_unit_code: str | Iterable[str] | None = None, + basin_code: str | Iterable[str] | None = None, + altitude: str | Iterable[str] | None = None, + altitude_accuracy: str | Iterable[str] | None = None, + altitude_method_code: str | Iterable[str] | None = None, + altitude_method_name: str | Iterable[str] | None = None, + vertical_datum: str | Iterable[str] | None = None, + vertical_datum_name: str | Iterable[str] | None = None, + horizontal_positional_accuracy_code: str | Iterable[str] | None = None, + horizontal_positional_accuracy: str | Iterable[str] | None = None, + horizontal_position_method_code: str | Iterable[str] | None = None, + horizontal_position_method_name: str | Iterable[str] | None = None, + original_horizontal_datum: str | Iterable[str] | None = None, + original_horizontal_datum_name: str | Iterable[str] | None = None, + drainage_area: str | Iterable[str] | None = None, + contributing_drainage_area: str | Iterable[str] | None = None, + time_zone_abbreviation: str | Iterable[str] | None = None, + uses_daylight_savings: str | Iterable[str] | None = None, + construction_date: str | Iterable[str] | None = None, + aquifer_code: str | Iterable[str] | None = None, + national_aquifer_code: str | Iterable[str] | None = None, + aquifer_type_code: str | Iterable[str] | None = None, + well_constructed_depth: str | Iterable[str] | None = None, + hole_constructed_depth: str | Iterable[str] | None = None, + depth_source_code: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: str | list[str] | None = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -721,6 +740,84 @@ def get_monitoring_locations( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + agency_code = _normalize_str_iterable(agency_code, "agency_code") + agency_name = _normalize_str_iterable(agency_name, "agency_name") + monitoring_location_number = _normalize_str_iterable( + monitoring_location_number, "monitoring_location_number" + ) + monitoring_location_name = _normalize_str_iterable( + monitoring_location_name, "monitoring_location_name" + ) + district_code = _normalize_str_iterable(district_code, "district_code") + country_code = _normalize_str_iterable(country_code, "country_code") + country_name = _normalize_str_iterable(country_name, "country_name") + state_code = _normalize_str_iterable(state_code, "state_code") + state_name = _normalize_str_iterable(state_name, "state_name") + county_code = _normalize_str_iterable(county_code, "county_code") + county_name = _normalize_str_iterable(county_name, "county_name") + minor_civil_division_code = _normalize_str_iterable( + minor_civil_division_code, "minor_civil_division_code" + ) + site_type_code = _normalize_str_iterable(site_type_code, "site_type_code") + site_type = _normalize_str_iterable(site_type, "site_type") + hydrologic_unit_code = _normalize_str_iterable( + hydrologic_unit_code, "hydrologic_unit_code" + ) + basin_code = _normalize_str_iterable(basin_code, "basin_code") + altitude = _normalize_str_iterable(altitude, "altitude") + altitude_accuracy = _normalize_str_iterable(altitude_accuracy, "altitude_accuracy") + altitude_method_code = _normalize_str_iterable( + altitude_method_code, "altitude_method_code" + ) + altitude_method_name = _normalize_str_iterable( + altitude_method_name, "altitude_method_name" + ) + vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum") + vertical_datum_name = _normalize_str_iterable( + vertical_datum_name, "vertical_datum_name" + ) + horizontal_positional_accuracy_code = _normalize_str_iterable( + horizontal_positional_accuracy_code, "horizontal_positional_accuracy_code" + ) + horizontal_positional_accuracy = _normalize_str_iterable( + horizontal_positional_accuracy, "horizontal_positional_accuracy" + ) + horizontal_position_method_code = _normalize_str_iterable( + horizontal_position_method_code, "horizontal_position_method_code" + ) + horizontal_position_method_name = _normalize_str_iterable( + horizontal_position_method_name, "horizontal_position_method_name" + ) + original_horizontal_datum = _normalize_str_iterable( + original_horizontal_datum, "original_horizontal_datum" + ) + original_horizontal_datum_name = _normalize_str_iterable( + original_horizontal_datum_name, "original_horizontal_datum_name" + ) + drainage_area = _normalize_str_iterable(drainage_area, "drainage_area") + contributing_drainage_area = _normalize_str_iterable( + contributing_drainage_area, "contributing_drainage_area" + ) + time_zone_abbreviation = _normalize_str_iterable( + time_zone_abbreviation, "time_zone_abbreviation" + ) + uses_daylight_savings = _normalize_str_iterable( + uses_daylight_savings, "uses_daylight_savings" + ) + construction_date = _normalize_str_iterable(construction_date, "construction_date") + aquifer_code = _normalize_str_iterable(aquifer_code, "aquifer_code") + national_aquifer_code = _normalize_str_iterable( + national_aquifer_code, "national_aquifer_code" + ) + aquifer_type_code = _normalize_str_iterable(aquifer_type_code, "aquifer_type_code") + well_constructed_depth = _normalize_str_iterable( + well_constructed_depth, "well_constructed_depth" + ) + hole_constructed_depth = _normalize_str_iterable( + hole_constructed_depth, "hole_constructed_depth" + ) + depth_source_code = _normalize_str_iterable(depth_source_code, "depth_source_code") + properties = _normalize_str_iterable(properties, "properties") service = "monitoring-locations" output_id = "monitoring_location_id" @@ -732,28 +829,28 @@ def get_monitoring_locations( def get_time_series_metadata( monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | list[str] | None = None, - parameter_name: str | list[str] | None = None, - properties: str | list[str] | None = None, - statistic_id: str | list[str] | None = None, - hydrologic_unit_code: str | list[str] | None = None, - state_name: str | list[str] | None = None, - last_modified: str | list[str] | None = None, - begin: str | list[str] | None = None, - end: str | list[str] | None = None, - begin_utc: str | list[str] | None = None, - end_utc: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - computation_period_identifier: str | list[str] | None = None, - computation_identifier: str | list[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + parameter_name: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + hydrologic_unit_code: str | Iterable[str] | None = None, + state_name: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, + begin: str | Iterable[str] | None = None, + end: str | Iterable[str] | None = None, + begin_utc: str | Iterable[str] | None = None, + end_utc: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + computation_period_identifier: str | Iterable[str] | None = None, + computation_identifier: str | Iterable[str] | None = None, thresholds: int | None = None, - sublocation_identifier: str | list[str] | None = None, - primary: str | list[str] | None = None, - parent_time_series_id: str | list[str] | None = None, - time_series_id: str | list[str] | None = None, - web_description: str | list[str] | None = None, + sublocation_identifier: str | Iterable[str] | None = None, + primary: str | Iterable[str] | None = None, + parent_time_series_id: str | Iterable[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + web_description: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: str | list[str] | None = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -945,6 +1042,30 @@ def get_time_series_metadata( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") + parameter_name = _normalize_str_iterable(parameter_name, "parameter_name") + properties = _normalize_str_iterable(properties, "properties") + statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") + hydrologic_unit_code = _normalize_str_iterable( + hydrologic_unit_code, "hydrologic_unit_code" + ) + state_name = _normalize_str_iterable(state_name, "state_name") + unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") + computation_period_identifier = _normalize_str_iterable( + computation_period_identifier, "computation_period_identifier" + ) + computation_identifier = _normalize_str_iterable( + computation_identifier, "computation_identifier" + ) + sublocation_identifier = _normalize_str_iterable( + sublocation_identifier, "sublocation_identifier" + ) + primary = _normalize_str_iterable(primary, "primary") + parent_time_series_id = _normalize_str_iterable( + parent_time_series_id, "parent_time_series_id" + ) + time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") + web_description = _normalize_str_iterable(web_description, "web_description") service = "time-series-metadata" output_id = "time_series_id" @@ -1188,18 +1309,18 @@ def get_combined_metadata( def get_latest_continuous( monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | list[str] | None = None, - statistic_id: str | list[str] | None = None, - properties: str | list[str] | None = None, - time_series_id: str | list[str] | None = None, - latest_continuous_id: str | list[str] | None = None, - approval_status: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - qualifier: str | list[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + latest_continuous_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, value: int | None = None, - last_modified: str | list[str] | None = None, + last_modified: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: str | list[str] | None = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1373,6 +1494,16 @@ def get_latest_continuous( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") + statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") + properties = _normalize_str_iterable(properties, "properties") + time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") + latest_continuous_id = _normalize_str_iterable( + latest_continuous_id, "latest_continuous_id" + ) + approval_status = _normalize_str_iterable(approval_status, "approval_status") + unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") + qualifier = _normalize_str_iterable(qualifier, "qualifier") service = "latest-continuous" output_id = "latest_continuous_id" @@ -1384,18 +1515,18 @@ def get_latest_continuous( def get_latest_daily( monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | list[str] | None = None, - statistic_id: str | list[str] | None = None, - properties: str | list[str] | None = None, - time_series_id: str | list[str] | None = None, - latest_daily_id: str | list[str] | None = None, - approval_status: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - qualifier: str | list[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + latest_daily_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, value: int | None = None, - last_modified: str | list[str] | None = None, + last_modified: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: str | list[str] | None = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1570,6 +1701,14 @@ def get_latest_daily( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") + statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") + properties = _normalize_str_iterable(properties, "properties") + time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") + latest_daily_id = _normalize_str_iterable(latest_daily_id, "latest_daily_id") + approval_status = _normalize_str_iterable(approval_status, "approval_status") + unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") + qualifier = _normalize_str_iterable(qualifier, "qualifier") service = "latest-daily" output_id = "latest_daily_id" @@ -1581,20 +1720,20 @@ def get_latest_daily( def get_field_measurements( monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | list[str] | None = None, - observing_procedure_code: str | list[str] | None = None, - properties: list[str] | None = None, - field_visit_id: str | list[str] | None = None, - approval_status: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - qualifier: str | list[str] | None = None, - value: str | list[str] | None = None, - last_modified: str | list[str] | None = None, - observing_procedure: str | list[str] | None = None, - vertical_datum: str | list[str] | None = None, - measuring_agency: str | list[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + observing_procedure_code: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, + field_visit_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, + value: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, + observing_procedure: str | Iterable[str] | None = None, + vertical_datum: str | Iterable[str] | None = None, + measuring_agency: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: str | list[str] | None = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1761,6 +1900,21 @@ def get_field_measurements( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") + observing_procedure_code = _normalize_str_iterable( + observing_procedure_code, "observing_procedure_code" + ) + properties = _normalize_str_iterable(properties, "properties") + field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id") + approval_status = _normalize_str_iterable(approval_status, "approval_status") + unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") + qualifier = _normalize_str_iterable(qualifier, "qualifier") + value = _normalize_str_iterable(value, "value") + observing_procedure = _normalize_str_iterable( + observing_procedure, "observing_procedure" + ) + vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum") + measuring_agency = _normalize_str_iterable(measuring_agency, "measuring_agency") service = "field-measurements" output_id = "field_measurement_id" @@ -2119,28 +2273,28 @@ def get_samples( ssl_check: bool = True, service: SERVICES = "results", profile: PROFILES = "fullphyschem", - activityMediaName: str | list[str] | None = None, + activityMediaName: str | Iterable[str] | None = None, activityStartDateLower: str | None = None, activityStartDateUpper: str | None = None, - activityTypeCode: str | list[str] | None = None, - characteristicGroup: str | list[str] | None = None, - characteristic: str | list[str] | None = None, - characteristicUserSupplied: str | list[str] | None = None, + activityTypeCode: str | Iterable[str] | None = None, + characteristicGroup: str | Iterable[str] | None = None, + characteristic: str | Iterable[str] | None = None, + characteristicUserSupplied: str | Iterable[str] | None = None, boundingBox: list[float] | None = None, - countryFips: str | list[str] | None = None, - stateFips: str | list[str] | None = None, - countyFips: str | list[str] | None = None, - siteTypeCode: str | list[str] | None = None, - siteTypeName: str | list[str] | None = None, - usgsPCode: str | list[str] | None = None, - hydrologicUnit: str | list[str] | None = None, - monitoringLocationIdentifier: str | list[str] | None = None, - organizationIdentifier: str | list[str] | None = None, + countryFips: str | Iterable[str] | None = None, + stateFips: str | Iterable[str] | None = None, + countyFips: str | Iterable[str] | None = None, + siteTypeCode: str | Iterable[str] | None = None, + siteTypeName: str | Iterable[str] | None = None, + usgsPCode: str | Iterable[str] | None = None, + hydrologicUnit: str | Iterable[str] | None = None, + monitoringLocationIdentifier: str | Iterable[str] | None = None, + organizationIdentifier: str | Iterable[str] | None = None, pointLocationLatitude: float | None = None, pointLocationLongitude: float | None = None, pointLocationWithinMiles: float | None = None, - projectIdentifier: str | list[str] | None = None, - recordIdentifierUserSupplied: str | list[str] | None = None, + projectIdentifier: str | Iterable[str] | None = None, + recordIdentifierUserSupplied: str | Iterable[str] | None = None, ) -> tuple[pd.DataFrame, BaseMetadata]: """Search Samples database for USGS water quality data. This is a wrapper function for the Samples database API. All potential @@ -2316,6 +2470,32 @@ def get_samples( ... ) """ + activityMediaName = _normalize_str_iterable(activityMediaName, "activityMediaName") + activityTypeCode = _normalize_str_iterable(activityTypeCode, "activityTypeCode") + characteristicGroup = _normalize_str_iterable( + characteristicGroup, "characteristicGroup" + ) + characteristic = _normalize_str_iterable(characteristic, "characteristic") + characteristicUserSupplied = _normalize_str_iterable( + characteristicUserSupplied, "characteristicUserSupplied" + ) + countryFips = _normalize_str_iterable(countryFips, "countryFips") + stateFips = _normalize_str_iterable(stateFips, "stateFips") + countyFips = _normalize_str_iterable(countyFips, "countyFips") + siteTypeCode = _normalize_str_iterable(siteTypeCode, "siteTypeCode") + siteTypeName = _normalize_str_iterable(siteTypeName, "siteTypeName") + usgsPCode = _normalize_str_iterable(usgsPCode, "usgsPCode") + hydrologicUnit = _normalize_str_iterable(hydrologicUnit, "hydrologicUnit") + monitoringLocationIdentifier = _normalize_str_iterable( + monitoringLocationIdentifier, "monitoringLocationIdentifier" + ) + organizationIdentifier = _normalize_str_iterable( + organizationIdentifier, "organizationIdentifier" + ) + projectIdentifier = _normalize_str_iterable(projectIdentifier, "projectIdentifier") + recordIdentifierUserSupplied = _normalize_str_iterable( + recordIdentifierUserSupplied, "recordIdentifierUserSupplied" + ) _check_profiles(service, profile) @@ -2418,18 +2598,18 @@ def get_samples_summary( def get_stats_por( approval_status: str | None = None, - computation_type: str | list[str] | None = None, - country_code: str | list[str] | None = None, - state_code: str | list[str] | None = None, - county_code: str | list[str] | None = None, + computation_type: str | Iterable[str] | None = None, + country_code: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, start_date: str | None = None, end_date: str | None = None, monitoring_location_id: str | Iterable[str] | None = None, page_size: int = 1000, - parent_time_series_id: str | list[str] | None = None, - site_type_code: str | list[str] | None = None, - site_type_name: str | list[str] | None = None, - parameter_code: str | list[str] | None = None, + parent_time_series_id: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type_name: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, expand_percentiles: bool = True, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get day-of-year and month-of-year water data statistics from the @@ -2532,6 +2712,16 @@ def get_stats_por( ... end_date="01-31", ... ) """ + computation_type = _normalize_str_iterable(computation_type, "computation_type") + country_code = _normalize_str_iterable(country_code, "country_code") + state_code = _normalize_str_iterable(state_code, "state_code") + county_code = _normalize_str_iterable(county_code, "county_code") + parent_time_series_id = _normalize_str_iterable( + parent_time_series_id, "parent_time_series_id" + ) + site_type_code = _normalize_str_iterable(site_type_code, "site_type_code") + site_type_name = _normalize_str_iterable(site_type_name, "site_type_name") + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") # Build argument dictionary, omitting None values monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) @@ -2543,18 +2733,18 @@ def get_stats_por( def get_stats_date_range( approval_status: str | None = None, - computation_type: str | list[str] | None = None, - country_code: str | list[str] | None = None, - state_code: str | list[str] | None = None, - county_code: str | list[str] | None = None, + computation_type: str | Iterable[str] | None = None, + country_code: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, start_date: str | None = None, end_date: str | None = None, monitoring_location_id: str | Iterable[str] | None = None, page_size: int = 1000, - parent_time_series_id: str | list[str] | None = None, - site_type_code: str | list[str] | None = None, - site_type_name: str | list[str] | None = None, - parameter_code: str | list[str] | None = None, + parent_time_series_id: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type_name: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, expand_percentiles: bool = True, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get monthly and annual water data statistics from the USGS Water Data API. @@ -2662,6 +2852,16 @@ def get_stats_date_range( ... computation_type=["minimum", "maximum"], ... ) """ + computation_type = _normalize_str_iterable(computation_type, "computation_type") + country_code = _normalize_str_iterable(country_code, "country_code") + state_code = _normalize_str_iterable(state_code, "state_code") + county_code = _normalize_str_iterable(county_code, "county_code") + parent_time_series_id = _normalize_str_iterable( + parent_time_series_id, "parent_time_series_id" + ) + site_type_code = _normalize_str_iterable(site_type_code, "site_type_code") + site_type_name = _normalize_str_iterable(site_type_name, "site_type_name") + parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") # Build argument dictionary, omitting None values monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) @@ -2675,30 +2875,30 @@ def get_stats_date_range( def get_channel( monitoring_location_id: str | Iterable[str] | None = None, - field_visit_id: str | list[str] | None = None, - measurement_number: str | list[str] | None = None, - time: str | list[str] | None = None, - channel_name: str | list[str] | None = None, - channel_flow: str | list[str] | None = None, - channel_flow_unit: str | list[str] | None = None, - channel_width: str | list[str] | None = None, - channel_width_unit: str | list[str] | None = None, - channel_area: str | list[str] | None = None, - channel_area_unit: str | list[str] | None = None, - channel_velocity: str | list[str] | None = None, - channel_velocity_unit: str | list[str] | None = None, - channel_location_distance: str | list[str] | None = None, - channel_location_distance_unit: str | list[str] | None = None, - channel_stability: str | list[str] | None = None, - channel_material: str | list[str] | None = None, - channel_evenness: str | list[str] | None = None, - horizontal_velocity_description: str | list[str] | None = None, - vertical_velocity_description: str | list[str] | None = None, - longitudinal_velocity_description: str | list[str] | None = None, - measurement_type: str | list[str] | None = None, - last_modified: str | list[str] | None = None, - channel_measurement_type: str | list[str] | None = None, - properties: list[str] | None = None, + field_visit_id: str | Iterable[str] | None = None, + measurement_number: str | Iterable[str] | None = None, + time: str | Iterable[str] | None = None, + channel_name: str | Iterable[str] | None = None, + channel_flow: str | Iterable[str] | None = None, + channel_flow_unit: str | Iterable[str] | None = None, + channel_width: str | Iterable[str] | None = None, + channel_width_unit: str | Iterable[str] | None = None, + channel_area: str | Iterable[str] | None = None, + channel_area_unit: str | Iterable[str] | None = None, + channel_velocity: str | Iterable[str] | None = None, + channel_velocity_unit: str | Iterable[str] | None = None, + channel_location_distance: str | Iterable[str] | None = None, + channel_location_distance_unit: str | Iterable[str] | None = None, + channel_stability: str | Iterable[str] | None = None, + channel_material: str | Iterable[str] | None = None, + channel_evenness: str | Iterable[str] | None = None, + horizontal_velocity_description: str | Iterable[str] | None = None, + vertical_velocity_description: str | Iterable[str] | None = None, + longitudinal_velocity_description: str | Iterable[str] | None = None, + measurement_type: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, + channel_measurement_type: str | Iterable[str] | None = None, + properties: str | Iterable[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, @@ -2837,6 +3037,46 @@ def get_channel( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) + field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id") + measurement_number = _normalize_str_iterable( + measurement_number, "measurement_number" + ) + channel_name = _normalize_str_iterable(channel_name, "channel_name") + channel_flow = _normalize_str_iterable(channel_flow, "channel_flow") + channel_flow_unit = _normalize_str_iterable(channel_flow_unit, "channel_flow_unit") + channel_width = _normalize_str_iterable(channel_width, "channel_width") + channel_width_unit = _normalize_str_iterable( + channel_width_unit, "channel_width_unit" + ) + channel_area = _normalize_str_iterable(channel_area, "channel_area") + channel_area_unit = _normalize_str_iterable(channel_area_unit, "channel_area_unit") + channel_velocity = _normalize_str_iterable(channel_velocity, "channel_velocity") + channel_velocity_unit = _normalize_str_iterable( + channel_velocity_unit, "channel_velocity_unit" + ) + channel_location_distance = _normalize_str_iterable( + channel_location_distance, "channel_location_distance" + ) + channel_location_distance_unit = _normalize_str_iterable( + channel_location_distance_unit, "channel_location_distance_unit" + ) + channel_stability = _normalize_str_iterable(channel_stability, "channel_stability") + channel_material = _normalize_str_iterable(channel_material, "channel_material") + channel_evenness = _normalize_str_iterable(channel_evenness, "channel_evenness") + horizontal_velocity_description = _normalize_str_iterable( + horizontal_velocity_description, "horizontal_velocity_description" + ) + vertical_velocity_description = _normalize_str_iterable( + vertical_velocity_description, "vertical_velocity_description" + ) + longitudinal_velocity_description = _normalize_str_iterable( + longitudinal_velocity_description, "longitudinal_velocity_description" + ) + measurement_type = _normalize_str_iterable(measurement_type, "measurement_type") + channel_measurement_type = _normalize_str_iterable( + channel_measurement_type, "channel_measurement_type" + ) + properties = _normalize_str_iterable(properties, "properties") service = "channel-measurements" output_id = "channel_measurements_id" diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index e05a9120..6be5be89 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1172,26 +1172,77 @@ def _check_profiles( _MONITORING_LOCATION_ID_RE = re.compile(r".+-.+") +def _normalize_str_iterable( + value: str | Iterable[str] | None, + param_name: str = "value", +) -> str | list[str] | None: + """Validate and normalize a parameter that accepts a string or iterable of strings. + + Used by every public waterdata getter for multi-value string parameters + (``parameter_code``, ``statistic_id``, ``state_name``, ...) so any + sequence-like input — ``list``, ``tuple``, ``pandas.Series``, + ``pandas.Index``, ``numpy.ndarray``, generators, sets — works at the + public boundary. The downstream ``_construct_api_requests`` branches + on ``isinstance(v, (list, tuple))``, so iterables are materialized to + a ``list`` here. ``Mapping`` types are rejected because iterating a + mapping yields keys, which would be a footgun. + + Parameters + ---------- + value : None, str, or iterable of str + param_name : str, optional + Name of the parameter, used in error messages. Defaults to + ``"value"``. + + Returns + ------- + None, str, or list of str + ``None`` and ``str`` are returned unchanged; non-string iterables + are returned as a ``list``. + + Raises + ------ + TypeError + If the input isn't ``None``, ``str``, or a non-``Mapping`` + iterable; or if any iterable element isn't a string. + """ + if value is None: + return None + if isinstance(value, str): + return value + if isinstance(value, Mapping) or not isinstance(value, Iterable): + raise TypeError( + f"{param_name} must be a string or iterable of strings, " + f"not {type(value).__name__} (got {value!r})." + ) + values = list(value) + for v in values: + if not isinstance(v, str): + raise TypeError( + f"{param_name} elements must be strings, " + f"not {type(v).__name__} (got {v!r})." + ) + return values + + def _check_monitoring_location_id( monitoring_location_id: str | Iterable[str] | None, ) -> str | list[str] | None: """Validate and normalize a ``monitoring_location_id`` value. + Combines :func:`_normalize_str_iterable` with the AGENCY-ID format + check that is unique to ``monitoring_location_id`` (the OGC spec + requires a hyphen separator, e.g. ``USGS-01646500``). + Parameters ---------- monitoring_location_id : None, str, or iterable of str - ``None``, a single AGENCY-ID string, or any non-string, - non-``Mapping`` iterable of such strings (``list``, ``tuple``, - ``pandas.Series``, ``pandas.Index``, ``numpy.ndarray``, ...). - ``Mapping`` types are rejected because iterating a mapping yields - keys, which would be a footgun. + See :func:`_normalize_str_iterable`. Each string is additionally + required to match the AGENCY-ID hyphen-separated format. Returns ------- None, str, or list of str - ``None`` and ``str`` are returned unchanged; iterables are - materialized to a ``list`` so downstream code that branches on - ``isinstance(v, list)`` keeps working. Raises ------ @@ -1202,33 +1253,15 @@ def _check_monitoring_location_id( If any identifier doesn't contain a hyphen separator (per the OGC API spec: AGENCY-ID format, e.g. ``USGS-01646500``). """ - if monitoring_location_id is None: + value = _normalize_str_iterable(monitoring_location_id, "monitoring_location_id") + if value is None: return None - - if isinstance(monitoring_location_id, str): - _check_id_format(monitoring_location_id) - return monitoring_location_id - - if isinstance(monitoring_location_id, Mapping) or not isinstance( - monitoring_location_id, Iterable - ): - raise TypeError( - f"monitoring_location_id must be a string or iterable of strings, " - f"not {type(monitoring_location_id).__name__} " - f"(got {monitoring_location_id!r}). " - f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." - ) - - ids = list(monitoring_location_id) - for id_ in ids: - if not isinstance(id_, str): - raise TypeError( - f"monitoring_location_id elements must be strings, " - f"not {type(id_).__name__} (got {id_!r}). " - f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." - ) - _check_id_format(id_) - return ids + if isinstance(value, str): + _check_id_format(value) + else: + for v in value: + _check_id_format(v) + return value def _check_id_format(value: str) -> None: diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 18fe52aa..9c080884 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -26,7 +26,11 @@ get_stats_por, get_time_series_metadata, ) -from dataretrieval.waterdata.utils import _check_monitoring_location_id, _check_profiles +from dataretrieval.waterdata.utils import ( + _check_monitoring_location_id, + _check_profiles, + _normalize_str_iterable, +) def mock_request(requests_mock, request_url, file_path): @@ -601,3 +605,76 @@ def test_get_daily_malformed_id_raises(self): """get_daily raises ValueError for a malformed string ID.""" with pytest.raises(ValueError): get_daily(monitoring_location_id="dog", parameter_code="00060") + + +class TestNormalizeStrIterable: + """Tests for the generic _normalize_str_iterable helper. + + Mirrors TestCheckMonitoringLocationId for the type/iterable contract; + the AGENCY-ID format check is monitoring_location_id-specific and lives + only in the _check_monitoring_location_id wrapper. + """ + + def test_none_passes(self): + assert _normalize_str_iterable(None, "p") is None + + def test_string_returned_unchanged(self): + assert _normalize_str_iterable("00060", "parameter_code") == "00060" + # Note: no hyphen requirement here — that's monitoring_location_id-specific. + assert _normalize_str_iterable("dog", "parameter_code") == "dog" + + def test_list_returned_unchanged(self): + assert _normalize_str_iterable(["00060", "00010"], "p") == ["00060", "00010"] + + def test_tuple_normalizes_to_list(self): + result = _normalize_str_iterable(("00060", "00010"), "p") + assert result == ["00060", "00010"] + assert isinstance(result, list) + + def test_pandas_series_normalizes_to_list(self): + result = _normalize_str_iterable(pd.Series(["00060", "00010"]), "p") + assert result == ["00060", "00010"] + assert isinstance(result, list) + + def test_numpy_array_normalizes_to_list(self): + import numpy as np + + result = _normalize_str_iterable(np.array(["00060", "00010"]), "p") + assert result == ["00060", "00010"] + assert isinstance(result, list) + + def test_int_raises_type_error(self): + with pytest.raises(TypeError, match="parameter_code must be a string"): + _normalize_str_iterable(5129115, "parameter_code") + + def test_int_in_iterable_raises_type_error(self): + with pytest.raises(TypeError, match="parameter_code elements must be strings"): + _normalize_str_iterable(["00060", 5129115], "parameter_code") + + def test_dict_raises_type_error(self): + with pytest.raises(TypeError, match="not dict"): + _normalize_str_iterable({"00060": "discharge"}, "parameter_code") + + def test_get_daily_parameter_code_as_series(self): + """Wiring check: pd.Series for ``parameter_code`` arrives at the inner + call as a list. + + Regression for the gap PR #229 originally left on every multi-value + parameter other than ``monitoring_location_id``. Pre-fix, the Series + was passed through to ``requests`` which str-serialized it into the + URL (or POST body). Post-fix, ``_normalize_str_iterable`` materializes + it to ``list`` at the function boundary. + """ + from unittest import mock as _mock + + with _mock.patch("dataretrieval.waterdata.api.get_ogc_data") as fake: + fake.return_value = (pd.DataFrame(), _mock.MagicMock(spec=[])) + get_daily( + monitoring_location_id="USGS-05427718", + parameter_code=pd.Series(["00060", "00010"]), + ) + # _get_args(locals()) packs kwargs and passes them as `args` to + # get_ogc_data; the first positional argument is the args dict. + args_dict = fake.call_args[0][0] + assert args_dict["parameter_code"] == ["00060", "00010"] + assert isinstance(args_dict["parameter_code"], list) From e8c8e88376bbe095e5a156dd36e0d23ec8bc7149 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 09:01:28 -0500 Subject: [PATCH 06/21] Polish from /simplify review Three small follow-ups to c206c0c addressing review findings: 1. Restore the AGENCY-ID hint in _check_monitoring_location_id's TypeError. The refactor through _normalize_str_iterable accidentally dropped the trailing "Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." that the original wrapper carried. Catch+re-raise so monitoring_location_id keeps its helpful error while the generic helper stays generic. Pinned with a new assertion in test_integer_raises_type_error. 2. Document the date-range exclusion in _normalize_str_iterable's docstring. `time`, `last_modified`, `begin`, `end`, `datetime` bypass this helper because _format_api_dates handles their single-string-or-range semantics inside _construct_api_requests; the exclusion lived only in the prior commit message. 3. Single-pass validation. The helper previously did `values = list(value)` followed by a separate `for v in values` isinstance loop. Folded into one loop that builds the list while validating per-element. Plus tests: hoist `from unittest import mock` to module level (matches the rest of the repo's test files) instead of importing it inside the test body. No behavior change for valid inputs. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/utils.py | 28 +++++++++++++++++++++------- tests/waterdata_test.py | 14 ++++++++------ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 6be5be89..ef71c353 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1181,12 +1181,17 @@ def _normalize_str_iterable( Used by every public waterdata getter for multi-value string parameters (``parameter_code``, ``statistic_id``, ``state_name``, ...) so any sequence-like input — ``list``, ``tuple``, ``pandas.Series``, - ``pandas.Index``, ``numpy.ndarray``, generators, sets — works at the - public boundary. The downstream ``_construct_api_requests`` branches - on ``isinstance(v, (list, tuple))``, so iterables are materialized to - a ``list`` here. ``Mapping`` types are rejected because iterating a + ``pandas.Index``, ``numpy.ndarray``, generators — works at the public + boundary. The downstream ``_construct_api_requests`` branches on + ``isinstance(v, (list, tuple))``, so iterables are materialized to a + ``list`` here. ``Mapping`` types are rejected because iterating a mapping yields keys, which would be a footgun. + Date-range params (``time``, ``last_modified``, ``begin``, ``end``, + ``datetime``) deliberately bypass this helper; their single-string-or- + two-element-range semantics are handled by ``_format_api_dates`` inside + ``_construct_api_requests``. + Parameters ---------- value : None, str, or iterable of str @@ -1215,13 +1220,14 @@ def _normalize_str_iterable( f"{param_name} must be a string or iterable of strings, " f"not {type(value).__name__} (got {value!r})." ) - values = list(value) - for v in values: + values: list[str] = [] + for v in value: if not isinstance(v, str): raise TypeError( f"{param_name} elements must be strings, " f"not {type(v).__name__} (got {v!r})." ) + values.append(v) return values @@ -1253,7 +1259,15 @@ def _check_monitoring_location_id( If any identifier doesn't contain a hyphen separator (per the OGC API spec: AGENCY-ID format, e.g. ``USGS-01646500``). """ - value = _normalize_str_iterable(monitoring_location_id, "monitoring_location_id") + try: + value = _normalize_str_iterable( + monitoring_location_id, "monitoring_location_id" + ) + except TypeError as exc: + # Re-raise with the AGENCY-ID hint the generic helper doesn't carry. + raise TypeError( + f"{exc} Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'." + ) from None if value is None: return None if isinstance(value, str): diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 9c080884..fbf9c705 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -1,5 +1,6 @@ import datetime import sys +from unittest import mock import pandas as pd import pytest @@ -531,9 +532,12 @@ def test_none_passes(self): assert _check_monitoring_location_id(None) is None def test_integer_raises_type_error(self): - """An integer ID raises TypeError with a helpful message.""" - with pytest.raises(TypeError, match="not int"): + """An integer ID raises TypeError with a helpful AGENCY-ID hint.""" + with pytest.raises(TypeError, match="not int") as exc_info: _check_monitoring_location_id(5129115) + # The wrapper appends the AGENCY-ID format hint that the generic + # helper alone doesn't carry. + assert "USGS-01646500" in str(exc_info.value) def test_integer_in_list_raises_type_error(self): """An integer inside a list raises TypeError.""" @@ -665,10 +669,8 @@ def test_get_daily_parameter_code_as_series(self): URL (or POST body). Post-fix, ``_normalize_str_iterable`` materializes it to ``list`` at the function boundary. """ - from unittest import mock as _mock - - with _mock.patch("dataretrieval.waterdata.api.get_ogc_data") as fake: - fake.return_value = (pd.DataFrame(), _mock.MagicMock(spec=[])) + with mock.patch("dataretrieval.waterdata.api.get_ogc_data") as fake: + fake.return_value = (pd.DataFrame(), mock.MagicMock(spec=[])) get_daily( monitoring_location_id="USGS-05427718", parameter_code=pd.Series(["00060", "00010"]), From 71823ade644deb55335545f7e70b0933144b7c7f Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 09:26:06 -0500 Subject: [PATCH 07/21] Centralize string-iterable normalization in _get_args (-153 LOC) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Copilot's review #5 (collapse mechanical per-param calls) and #1/#2 along the way: - `_get_args` now normalizes every multi-value string param it sees, using: * a tiny ``_NO_NORMALIZE_PARAMS`` set for the few names that need explicit bypass (``monitoring_location_id``, validated separately; ``time``/``last_modified``/``begin``/``end``/``datetime``, which can contain ``pd.NaT``/None and are parsed by ``_format_api_dates``); * runtime type detection for the rest: scalar non-string knobs (``limit``, ``ssl_check``, ``convert_type``, ``skip_geometry``, ...) and ``list[float]`` params (``bbox``, ``boundingBox``) pass through automatically without listing them. - Strip 153 per-function ``_normalize_str_iterable(...)`` assignments from ``dataretrieval/waterdata/api.py`` and drop the now-unused import. Net: -240 LOC in api.py. - Tighten ``_MONITORING_LOCATION_ID_RE`` from ``.+-.+`` to ``[^-\s]+-[^-\s]+`` (Copilot #2). Now rejects values with leading/ trailing whitespace or multiple hyphens, which used to pass and then silently return 0 rows from the API. - Fix the ``_normalize_str_iterable`` docstring claim "Used by every public waterdata getter" — now accurate ("from ``_get_args`` for every multi-value string parameter on every waterdata getter that uses ``_get_args``"; ``get_nearest_continuous`` and a few others don't). 26 normalizer/validator tests still pass; 267 + 2 skipped + 4 deselected in the full suite (the 4 deselected are flaky live-API tests that 502 intermittently — unrelated). Ruff lint + format clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/api.py | 240 ------------------------------- dataretrieval/waterdata/utils.py | 70 +++++++-- 2 files changed, 55 insertions(+), 255 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 6cc59db4..181893e7 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -31,7 +31,6 @@ _check_profiles, _default_headers, _get_args, - _normalize_str_iterable, get_ogc_data, get_stats_data, ) @@ -234,15 +233,6 @@ def get_daily( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") - statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") - properties = _normalize_str_iterable(properties, "properties") - time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") - daily_id = _normalize_str_iterable(daily_id, "daily_id") - approval_status = _normalize_str_iterable(approval_status, "approval_status") - unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") - qualifier = _normalize_str_iterable(qualifier, "qualifier") - value = _normalize_str_iterable(value, "value") service = "daily" output_id = "daily_id" @@ -432,15 +422,6 @@ def get_continuous( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") - statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") - properties = _normalize_str_iterable(properties, "properties") - time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") - continuous_id = _normalize_str_iterable(continuous_id, "continuous_id") - approval_status = _normalize_str_iterable(approval_status, "approval_status") - unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") - qualifier = _normalize_str_iterable(qualifier, "qualifier") - value = _normalize_str_iterable(value, "value") service = "continuous" output_id = "continuous_id" @@ -740,84 +721,6 @@ def get_monitoring_locations( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - agency_code = _normalize_str_iterable(agency_code, "agency_code") - agency_name = _normalize_str_iterable(agency_name, "agency_name") - monitoring_location_number = _normalize_str_iterable( - monitoring_location_number, "monitoring_location_number" - ) - monitoring_location_name = _normalize_str_iterable( - monitoring_location_name, "monitoring_location_name" - ) - district_code = _normalize_str_iterable(district_code, "district_code") - country_code = _normalize_str_iterable(country_code, "country_code") - country_name = _normalize_str_iterable(country_name, "country_name") - state_code = _normalize_str_iterable(state_code, "state_code") - state_name = _normalize_str_iterable(state_name, "state_name") - county_code = _normalize_str_iterable(county_code, "county_code") - county_name = _normalize_str_iterable(county_name, "county_name") - minor_civil_division_code = _normalize_str_iterable( - minor_civil_division_code, "minor_civil_division_code" - ) - site_type_code = _normalize_str_iterable(site_type_code, "site_type_code") - site_type = _normalize_str_iterable(site_type, "site_type") - hydrologic_unit_code = _normalize_str_iterable( - hydrologic_unit_code, "hydrologic_unit_code" - ) - basin_code = _normalize_str_iterable(basin_code, "basin_code") - altitude = _normalize_str_iterable(altitude, "altitude") - altitude_accuracy = _normalize_str_iterable(altitude_accuracy, "altitude_accuracy") - altitude_method_code = _normalize_str_iterable( - altitude_method_code, "altitude_method_code" - ) - altitude_method_name = _normalize_str_iterable( - altitude_method_name, "altitude_method_name" - ) - vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum") - vertical_datum_name = _normalize_str_iterable( - vertical_datum_name, "vertical_datum_name" - ) - horizontal_positional_accuracy_code = _normalize_str_iterable( - horizontal_positional_accuracy_code, "horizontal_positional_accuracy_code" - ) - horizontal_positional_accuracy = _normalize_str_iterable( - horizontal_positional_accuracy, "horizontal_positional_accuracy" - ) - horizontal_position_method_code = _normalize_str_iterable( - horizontal_position_method_code, "horizontal_position_method_code" - ) - horizontal_position_method_name = _normalize_str_iterable( - horizontal_position_method_name, "horizontal_position_method_name" - ) - original_horizontal_datum = _normalize_str_iterable( - original_horizontal_datum, "original_horizontal_datum" - ) - original_horizontal_datum_name = _normalize_str_iterable( - original_horizontal_datum_name, "original_horizontal_datum_name" - ) - drainage_area = _normalize_str_iterable(drainage_area, "drainage_area") - contributing_drainage_area = _normalize_str_iterable( - contributing_drainage_area, "contributing_drainage_area" - ) - time_zone_abbreviation = _normalize_str_iterable( - time_zone_abbreviation, "time_zone_abbreviation" - ) - uses_daylight_savings = _normalize_str_iterable( - uses_daylight_savings, "uses_daylight_savings" - ) - construction_date = _normalize_str_iterable(construction_date, "construction_date") - aquifer_code = _normalize_str_iterable(aquifer_code, "aquifer_code") - national_aquifer_code = _normalize_str_iterable( - national_aquifer_code, "national_aquifer_code" - ) - aquifer_type_code = _normalize_str_iterable(aquifer_type_code, "aquifer_type_code") - well_constructed_depth = _normalize_str_iterable( - well_constructed_depth, "well_constructed_depth" - ) - hole_constructed_depth = _normalize_str_iterable( - hole_constructed_depth, "hole_constructed_depth" - ) - depth_source_code = _normalize_str_iterable(depth_source_code, "depth_source_code") - properties = _normalize_str_iterable(properties, "properties") service = "monitoring-locations" output_id = "monitoring_location_id" @@ -1042,30 +945,6 @@ def get_time_series_metadata( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") - parameter_name = _normalize_str_iterable(parameter_name, "parameter_name") - properties = _normalize_str_iterable(properties, "properties") - statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") - hydrologic_unit_code = _normalize_str_iterable( - hydrologic_unit_code, "hydrologic_unit_code" - ) - state_name = _normalize_str_iterable(state_name, "state_name") - unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") - computation_period_identifier = _normalize_str_iterable( - computation_period_identifier, "computation_period_identifier" - ) - computation_identifier = _normalize_str_iterable( - computation_identifier, "computation_identifier" - ) - sublocation_identifier = _normalize_str_iterable( - sublocation_identifier, "sublocation_identifier" - ) - primary = _normalize_str_iterable(primary, "primary") - parent_time_series_id = _normalize_str_iterable( - parent_time_series_id, "parent_time_series_id" - ) - time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") - web_description = _normalize_str_iterable(web_description, "web_description") service = "time-series-metadata" output_id = "time_series_id" @@ -1494,16 +1373,6 @@ def get_latest_continuous( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") - statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") - properties = _normalize_str_iterable(properties, "properties") - time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") - latest_continuous_id = _normalize_str_iterable( - latest_continuous_id, "latest_continuous_id" - ) - approval_status = _normalize_str_iterable(approval_status, "approval_status") - unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") - qualifier = _normalize_str_iterable(qualifier, "qualifier") service = "latest-continuous" output_id = "latest_continuous_id" @@ -1701,14 +1570,6 @@ def get_latest_daily( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") - statistic_id = _normalize_str_iterable(statistic_id, "statistic_id") - properties = _normalize_str_iterable(properties, "properties") - time_series_id = _normalize_str_iterable(time_series_id, "time_series_id") - latest_daily_id = _normalize_str_iterable(latest_daily_id, "latest_daily_id") - approval_status = _normalize_str_iterable(approval_status, "approval_status") - unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") - qualifier = _normalize_str_iterable(qualifier, "qualifier") service = "latest-daily" output_id = "latest_daily_id" @@ -1900,21 +1761,6 @@ def get_field_measurements( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") - observing_procedure_code = _normalize_str_iterable( - observing_procedure_code, "observing_procedure_code" - ) - properties = _normalize_str_iterable(properties, "properties") - field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id") - approval_status = _normalize_str_iterable(approval_status, "approval_status") - unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure") - qualifier = _normalize_str_iterable(qualifier, "qualifier") - value = _normalize_str_iterable(value, "value") - observing_procedure = _normalize_str_iterable( - observing_procedure, "observing_procedure" - ) - vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum") - measuring_agency = _normalize_str_iterable(measuring_agency, "measuring_agency") service = "field-measurements" output_id = "field_measurement_id" @@ -2470,32 +2316,6 @@ def get_samples( ... ) """ - activityMediaName = _normalize_str_iterable(activityMediaName, "activityMediaName") - activityTypeCode = _normalize_str_iterable(activityTypeCode, "activityTypeCode") - characteristicGroup = _normalize_str_iterable( - characteristicGroup, "characteristicGroup" - ) - characteristic = _normalize_str_iterable(characteristic, "characteristic") - characteristicUserSupplied = _normalize_str_iterable( - characteristicUserSupplied, "characteristicUserSupplied" - ) - countryFips = _normalize_str_iterable(countryFips, "countryFips") - stateFips = _normalize_str_iterable(stateFips, "stateFips") - countyFips = _normalize_str_iterable(countyFips, "countyFips") - siteTypeCode = _normalize_str_iterable(siteTypeCode, "siteTypeCode") - siteTypeName = _normalize_str_iterable(siteTypeName, "siteTypeName") - usgsPCode = _normalize_str_iterable(usgsPCode, "usgsPCode") - hydrologicUnit = _normalize_str_iterable(hydrologicUnit, "hydrologicUnit") - monitoringLocationIdentifier = _normalize_str_iterable( - monitoringLocationIdentifier, "monitoringLocationIdentifier" - ) - organizationIdentifier = _normalize_str_iterable( - organizationIdentifier, "organizationIdentifier" - ) - projectIdentifier = _normalize_str_iterable(projectIdentifier, "projectIdentifier") - recordIdentifierUserSupplied = _normalize_str_iterable( - recordIdentifierUserSupplied, "recordIdentifierUserSupplied" - ) _check_profiles(service, profile) @@ -2712,16 +2532,6 @@ def get_stats_por( ... end_date="01-31", ... ) """ - computation_type = _normalize_str_iterable(computation_type, "computation_type") - country_code = _normalize_str_iterable(country_code, "country_code") - state_code = _normalize_str_iterable(state_code, "state_code") - county_code = _normalize_str_iterable(county_code, "county_code") - parent_time_series_id = _normalize_str_iterable( - parent_time_series_id, "parent_time_series_id" - ) - site_type_code = _normalize_str_iterable(site_type_code, "site_type_code") - site_type_name = _normalize_str_iterable(site_type_name, "site_type_name") - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") # Build argument dictionary, omitting None values monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) @@ -2852,16 +2662,6 @@ def get_stats_date_range( ... computation_type=["minimum", "maximum"], ... ) """ - computation_type = _normalize_str_iterable(computation_type, "computation_type") - country_code = _normalize_str_iterable(country_code, "country_code") - state_code = _normalize_str_iterable(state_code, "state_code") - county_code = _normalize_str_iterable(county_code, "county_code") - parent_time_series_id = _normalize_str_iterable( - parent_time_series_id, "parent_time_series_id" - ) - site_type_code = _normalize_str_iterable(site_type_code, "site_type_code") - site_type_name = _normalize_str_iterable(site_type_name, "site_type_name") - parameter_code = _normalize_str_iterable(parameter_code, "parameter_code") # Build argument dictionary, omitting None values monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) @@ -3037,46 +2837,6 @@ def get_channel( ... ) """ monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) - field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id") - measurement_number = _normalize_str_iterable( - measurement_number, "measurement_number" - ) - channel_name = _normalize_str_iterable(channel_name, "channel_name") - channel_flow = _normalize_str_iterable(channel_flow, "channel_flow") - channel_flow_unit = _normalize_str_iterable(channel_flow_unit, "channel_flow_unit") - channel_width = _normalize_str_iterable(channel_width, "channel_width") - channel_width_unit = _normalize_str_iterable( - channel_width_unit, "channel_width_unit" - ) - channel_area = _normalize_str_iterable(channel_area, "channel_area") - channel_area_unit = _normalize_str_iterable(channel_area_unit, "channel_area_unit") - channel_velocity = _normalize_str_iterable(channel_velocity, "channel_velocity") - channel_velocity_unit = _normalize_str_iterable( - channel_velocity_unit, "channel_velocity_unit" - ) - channel_location_distance = _normalize_str_iterable( - channel_location_distance, "channel_location_distance" - ) - channel_location_distance_unit = _normalize_str_iterable( - channel_location_distance_unit, "channel_location_distance_unit" - ) - channel_stability = _normalize_str_iterable(channel_stability, "channel_stability") - channel_material = _normalize_str_iterable(channel_material, "channel_material") - channel_evenness = _normalize_str_iterable(channel_evenness, "channel_evenness") - horizontal_velocity_description = _normalize_str_iterable( - horizontal_velocity_description, "horizontal_velocity_description" - ) - vertical_velocity_description = _normalize_str_iterable( - vertical_velocity_description, "vertical_velocity_description" - ) - longitudinal_velocity_description = _normalize_str_iterable( - longitudinal_velocity_description, "longitudinal_velocity_description" - ) - measurement_type = _normalize_str_iterable(measurement_type, "measurement_type") - channel_measurement_type = _normalize_str_iterable( - channel_measurement_type, "channel_measurement_type" - ) - properties = _normalize_str_iterable(properties, "properties") service = "channel-measurements" output_id = "channel_measurements_id" diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index ef71c353..d8ad3e33 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1169,7 +1169,30 @@ def _check_profiles( ) -_MONITORING_LOCATION_ID_RE = re.compile(r".+-.+") +_MONITORING_LOCATION_ID_RE = re.compile(r"[^-\s]+-[^-\s]+") + + +# Parameter names skipped by ``_get_args``'s string-iterable normalization. +# Scalar non-string knobs (``limit``, ``ssl_check``, …) and ``list[float]`` +# params (``bbox``, ``boundingBox``) are detected by *runtime type* and pass +# through automatically. The names below need explicit listing because their +# values *are* string-iterables but have separate handling downstream: +# +# * ``monitoring_location_id`` — validated by +# ``_check_monitoring_location_id`` at the public-function entry. +# * Date-range params (``time``, ``last_modified``, ``begin``, ``end``, +# ``datetime``) — support ``pd.NaT``/``None`` half-bounded endpoints and +# interval/duration strings; parsing happens in ``_format_api_dates``. +_NO_NORMALIZE_PARAMS = frozenset( + { + "monitoring_location_id", + "time", + "last_modified", + "begin", + "end", + "datetime", + } +) def _normalize_str_iterable( @@ -1178,19 +1201,21 @@ def _normalize_str_iterable( ) -> str | list[str] | None: """Validate and normalize a parameter that accepts a string or iterable of strings. - Used by every public waterdata getter for multi-value string parameters - (``parameter_code``, ``statistic_id``, ``state_name``, ...) so any - sequence-like input — ``list``, ``tuple``, ``pandas.Series``, - ``pandas.Index``, ``numpy.ndarray``, generators — works at the public - boundary. The downstream ``_construct_api_requests`` branches on - ``isinstance(v, (list, tuple))``, so iterables are materialized to a - ``list`` here. ``Mapping`` types are rejected because iterating a - mapping yields keys, which would be a footgun. + Called from ``_get_args`` for every multi-value string parameter on + every waterdata getter that uses ``_get_args`` (every OGC/Samples + function in ``dataretrieval/waterdata/api.py``). Accepts ``list``, + ``tuple``, ``pandas.Series``, ``pandas.Index``, ``numpy.ndarray``, + generators — anything iterable whose elements are strings. The + downstream ``_construct_api_requests`` branches on ``isinstance(v, + (list, tuple))``, so iterables are materialized to a ``list`` here. + ``Mapping`` types are rejected because iterating a mapping yields + keys, which would be a footgun. Date-range params (``time``, ``last_modified``, ``begin``, ``end``, - ``datetime``) deliberately bypass this helper; their single-string-or- - two-element-range semantics are handled by ``_format_api_dates`` inside - ``_construct_api_requests``. + ``datetime``, ...) deliberately bypass this helper via + ``_NO_NORMALIZE_PARAMS``; their single-string-or-two-element-range + semantics (including ``pd.NaT``/``None`` half-bounded endpoints) are + handled by ``_format_api_dates`` inside ``_construct_api_requests``. Parameters ---------- @@ -1313,6 +1338,21 @@ def _get_args( if exclude: to_exclude.update(exclude) - return { - k: v for k, v in local_vars.items() if k not in to_exclude and v is not None - } + args: dict[str, Any] = {} + for k, v in local_vars.items(): + if k in to_exclude or v is None: + continue + if k in _NO_NORMALIZE_PARAMS or isinstance(v, str): + args[k] = v + continue + if not isinstance(v, Iterable): + # Scalar non-string knob (bool / int / float) — pass through. + args[k] = v + continue + if isinstance(v, (list, tuple)) and v and not isinstance(v[0], str): + # list[float] / list[int] (e.g. bbox) — pass through. + args[k] = v + continue + # String-iterable: validate elements and materialize to list. + args[k] = _normalize_str_iterable(v, k) + return args From 0fd5730db652de0217978ca39ab1a2b7f09ccea2 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 09:27:05 -0500 Subject: [PATCH 08/21] _format_api_dates: materialize iterable inputs (Copilot #4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `time`/`datetime`/`last_modified`/`begin`/`end` were typed as `str | Iterable[str] | None`, but the implementation used `len(...)` and subscripting — generators and other non-Sequence iterables would have raised at runtime, contradicting the annotation. Add a single `list(...)` materialization line right after the str wrap, so any iterable (pandas.Series, numpy.ndarray, generators, sets) flows through cleanly. The half-bounded NaT/None range form is preserved. Verified by passing list / tuple / Series / generator / `[pd.NaT, ...]` through and getting the expected formatted output in each case. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index d8ad3e33..0ebb80f0 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -230,6 +230,10 @@ def _format_api_dates( # Convert single string to list for uniform processing if isinstance(datetime_input, str): datetime_input = [datetime_input] + elif not isinstance(datetime_input, (list, tuple)): + # Materialize any other iterable (pandas.Series, numpy.ndarray, + # generator, ...) so the len()/subscript operations below work. + datetime_input = list(datetime_input) # Check for null or all NA and return None if all(pd.isna(dt) or dt == "" or dt is None for dt in datetime_input): From 7c32beae78c793a60f251f3661af9e8144f23d66 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 09:34:53 -0500 Subject: [PATCH 09/21] Extract _DATE_RANGE_PARAMS; trim docstrings/comments from /simplify Three small follow-ups to the centralization: 1. Extract `_DATE_RANGE_PARAMS = frozenset({"datetime", "last_modified", "begin", "end", "time"})` once at module level. `_construct_api_requests` previously defined the same set twice (`single_params` for POST/GET routing, `time_periods` for the `_format_api_dates` call); the new `_NO_NORMALIZE_PARAMS` overlapped on the same five names. All three now reuse `_DATE_RANGE_PARAMS`. A future date param means one edit, not three. `_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"monitoring_location_id"}`. 2. Trim `_normalize_str_iterable` docstring from ~37 lines to ~20: drop the over-narration of callers and downstream branching; keep the contract (accepted shapes, return shape, raises). 3. Tighten the `_NO_NORMALIZE_PARAMS` comment to one short paragraph (was 11 lines) and inline the four-branch pass-through cascade in `_get_args` into a single boolean `if ... or ... or ... or ...:` so the per-branch noise comments drop away. Behavior unchanged. 26 normalizer/validator + 22 waterdata_utils tests pass; full suite 267 passed + 2 skipped + 4 deselected (flaky live-API 502s); ruff lint + format clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/utils.py | 91 +++++++++++--------------------- 1 file changed, 31 insertions(+), 60 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 0ebb80f0..94acefd8 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -144,6 +144,13 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s # admits time-only forms like ``PT36H``. _DURATION_RE = re.compile(r"^[Pp]T?\d") +# OGC API parameters that carry a date/datetime value (single string, +# two-element range, or interval/duration string) rather than a multi-value +# string list. Used by ``_construct_api_requests`` to keep them out of the +# POST/CQL2 multi-value path and to route them through ``_format_api_dates``, +# and by ``_NO_NORMALIZE_PARAMS`` to bypass string-iterable normalization. +_DATE_RANGE_PARAMS = frozenset({"datetime", "last_modified", "begin", "end", "time"}) + def _parse_datetime(value: str) -> datetime | None: """Parse a single datetime string against the supported formats. @@ -434,14 +441,11 @@ def _construct_api_requests( """ service_url = f"{OGC_API_URL}/collections/{service}/items" - # Single parameters can only have one value - single_params = {"datetime", "last_modified", "begin", "end", "time"} - # Identify which parameters should be included in the POST content body post_params = { k: v for k, v in kwargs.items() - if k not in single_params and isinstance(v, (list, tuple)) and len(v) > 1 + if k not in _DATE_RANGE_PARAMS and isinstance(v, (list, tuple)) and len(v) > 1 } # Everything else goes into the params dictionary for the URL @@ -457,8 +461,7 @@ def _construct_api_requests( POST = bool(post_params) # Convert dates to ISO08601 format - time_periods = {"last_modified", "datetime", "time", "begin", "end"} - for i in time_periods: + for i in _DATE_RANGE_PARAMS: if i in params: dates = service == "daily" and i != "last_modified" params[i] = _format_api_dates(params[i], date=dates) @@ -1176,63 +1179,35 @@ def _check_profiles( _MONITORING_LOCATION_ID_RE = re.compile(r"[^-\s]+-[^-\s]+") -# Parameter names skipped by ``_get_args``'s string-iterable normalization. -# Scalar non-string knobs (``limit``, ``ssl_check``, …) and ``list[float]`` -# params (``bbox``, ``boundingBox``) are detected by *runtime type* and pass -# through automatically. The names below need explicit listing because their -# values *are* string-iterables but have separate handling downstream: -# -# * ``monitoring_location_id`` — validated by -# ``_check_monitoring_location_id`` at the public-function entry. -# * Date-range params (``time``, ``last_modified``, ``begin``, ``end``, -# ``datetime``) — support ``pd.NaT``/``None`` half-bounded endpoints and -# interval/duration strings; parsing happens in ``_format_api_dates``. -_NO_NORMALIZE_PARAMS = frozenset( - { - "monitoring_location_id", - "time", - "last_modified", - "begin", - "end", - "datetime", - } -) +# Param names that ``_get_args`` must NOT push through ``_normalize_str_iterable``. +# Scalar non-string knobs and ``list[float]`` params are detected by runtime +# type; only string-iterable-shaped params with special handling need to be +# named here: ``monitoring_location_id`` (validated separately) and the date- +# range params (which may contain ``pd.NaT``/None or interval strings). +_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"monitoring_location_id"} def _normalize_str_iterable( value: str | Iterable[str] | None, param_name: str = "value", ) -> str | list[str] | None: - """Validate and normalize a parameter that accepts a string or iterable of strings. - - Called from ``_get_args`` for every multi-value string parameter on - every waterdata getter that uses ``_get_args`` (every OGC/Samples - function in ``dataretrieval/waterdata/api.py``). Accepts ``list``, - ``tuple``, ``pandas.Series``, ``pandas.Index``, ``numpy.ndarray``, - generators — anything iterable whose elements are strings. The - downstream ``_construct_api_requests`` branches on ``isinstance(v, - (list, tuple))``, so iterables are materialized to a ``list`` here. - ``Mapping`` types are rejected because iterating a mapping yields - keys, which would be a footgun. - - Date-range params (``time``, ``last_modified``, ``begin``, ``end``, - ``datetime``, ...) deliberately bypass this helper via - ``_NO_NORMALIZE_PARAMS``; their single-string-or-two-element-range - semantics (including ``pd.NaT``/``None`` half-bounded endpoints) are - handled by ``_format_api_dates`` inside ``_construct_api_requests``. + """Validate that ``value`` is None, a string, or an iterable of strings. + + Non-string iterables (``list``, ``tuple``, ``pandas.Series``, + ``pandas.Index``, ``numpy.ndarray``, generators) are materialized to a + ``list`` so downstream code that branches on ``isinstance(v, (list, + tuple))`` keeps working. ``Mapping`` types are rejected because + iterating a mapping yields keys, not values. Parameters ---------- value : None, str, or iterable of str param_name : str, optional - Name of the parameter, used in error messages. Defaults to - ``"value"``. + Used in error messages. Defaults to ``"value"``. Returns ------- None, str, or list of str - ``None`` and ``str`` are returned unchanged; non-string iterables - are returned as a ``list``. Raises ------ @@ -1346,17 +1321,13 @@ def _get_args( for k, v in local_vars.items(): if k in to_exclude or v is None: continue - if k in _NO_NORMALIZE_PARAMS or isinstance(v, str): - args[k] = v - continue - if not isinstance(v, Iterable): - # Scalar non-string knob (bool / int / float) — pass through. + if ( + k in _NO_NORMALIZE_PARAMS + or isinstance(v, str) + or not isinstance(v, Iterable) + or (isinstance(v, (list, tuple)) and v and not isinstance(v[0], str)) + ): args[k] = v - continue - if isinstance(v, (list, tuple)) and v and not isinstance(v[0], str): - # list[float] / list[int] (e.g. bbox) — pass through. - args[k] = v - continue - # String-iterable: validate elements and materialize to list. - args[k] = _normalize_str_iterable(v, k) + else: + args[k] = _normalize_str_iterable(v, k) return args From 749f72eadf086ece18f9c882f903ab705b973df5 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 10:01:39 -0500 Subject: [PATCH 10/21] Add StringFilter/StringList aliases; fix Copilot bugs Adopt named type aliases so the 180 signatures across api.py read as semantic intent ("a StringFilter / StringList / DateRange") instead of the mechanical "str | Iterable[str] | None" busy-work. Plus four correctness fixes from the latest Copilot pass. types.py: StringFilter = Optional[Union[str, Iterable[str]]] Multi-value filter: str OR iterable of str. The runtime normalizes iterables to list inside _get_args. StringList = Optional[Iterable[str]] Comma-joined list of property names. Excludes single str at the type level because ",".join(str) would iterate characters. api.py: - 172 `str | Iterable[str] | None` -> `StringFilter` - 8 `properties: ... | None` -> `properties: StringList` - Drop the now-unused `from collections.abc import Iterable` Correctness fixes: 1. _format_api_dates: handle `None` up front. The new `list(...)` materialization for Series/ndarray/generator support crashed on None even though the signature/docstring promised acceptance. (Copilot #1) 2. _get_args: add `_LIST_ONLY_STR_PARAMS = {"properties"}` and wrap stray single-string input into a one-element list, so `",".join(properties)` downstream stays safe. (Copilot #3) 3. _construct_api_requests: `if bbox:` -> `if bbox is not None and len(bbox) > 0`. The truthy check raised ValueError when bbox was a numpy.ndarray with >1 element. Use len() instead of truthy. (Copilot #2) 4. _NO_NORMALIZE_PARAMS: add `bbox`, `boundingBox`. These are list[float] params; the previous runtime-type heuristic in _get_args handled list/tuple of floats but a numpy.ndarray of floats would have been routed through `_normalize_str_iterable` and rejected as "elements must be strings". (Copilot #2) Full suite 267 passed + 2 skipped + 4 deselected (flaky live-API 502s); ruff lint + format clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/api.py | 363 ++++++++++++++++--------------- dataretrieval/waterdata/types.py | 19 +- dataretrieval/waterdata/utils.py | 34 ++- 3 files changed, 227 insertions(+), 189 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 181893e7..b42af094 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -8,7 +8,6 @@ import json import logging -from collections.abc import Iterable from io import StringIO from typing import get_args from urllib.parse import quote @@ -24,6 +23,8 @@ METADATA_COLLECTIONS, PROFILES, SERVICES, + StringFilter, + StringList, ) from dataretrieval.waterdata.utils import ( SAMPLES_URL, @@ -40,19 +41,19 @@ def get_daily( - monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, - statistic_id: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, - time_series_id: str | Iterable[str] | None = None, - daily_id: str | Iterable[str] | None = None, - approval_status: str | Iterable[str] | None = None, - unit_of_measure: str | Iterable[str] | None = None, - qualifier: str | Iterable[str] | None = None, - value: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + parameter_code: StringFilter = None, + statistic_id: StringFilter = None, + properties: StringList = None, + time_series_id: StringFilter = None, + daily_id: StringFilter = None, + approval_status: StringFilter = None, + unit_of_measure: StringFilter = None, + qualifier: StringFilter = None, + value: StringFilter = None, last_modified: str | None = None, skip_geometry: bool | None = None, - time: str | Iterable[str] | None = None, + time: StringFilter = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -243,18 +244,18 @@ def get_daily( def get_continuous( - monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, - statistic_id: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, - time_series_id: str | Iterable[str] | None = None, - continuous_id: str | Iterable[str] | None = None, - approval_status: str | Iterable[str] | None = None, - unit_of_measure: str | Iterable[str] | None = None, - qualifier: str | Iterable[str] | None = None, - value: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + parameter_code: StringFilter = None, + statistic_id: StringFilter = None, + properties: StringList = None, + time_series_id: StringFilter = None, + continuous_id: StringFilter = None, + approval_status: StringFilter = None, + unit_of_measure: StringFilter = None, + qualifier: StringFilter = None, + value: StringFilter = None, last_modified: str | None = None, - time: str | Iterable[str] | None = None, + time: StringFilter = None, limit: int | None = None, filter: str | None = None, filter_lang: FILTER_LANG | None = None, @@ -432,49 +433,49 @@ def get_continuous( def get_monitoring_locations( - monitoring_location_id: str | Iterable[str] | None = None, - agency_code: str | Iterable[str] | None = None, - agency_name: str | Iterable[str] | None = None, - monitoring_location_number: str | Iterable[str] | None = None, - monitoring_location_name: str | Iterable[str] | None = None, - district_code: str | Iterable[str] | None = None, - country_code: str | Iterable[str] | None = None, - country_name: str | Iterable[str] | None = None, - state_code: str | Iterable[str] | None = None, - state_name: str | Iterable[str] | None = None, - county_code: str | Iterable[str] | None = None, - county_name: str | Iterable[str] | None = None, - minor_civil_division_code: str | Iterable[str] | None = None, - site_type_code: str | Iterable[str] | None = None, - site_type: str | Iterable[str] | None = None, - hydrologic_unit_code: str | Iterable[str] | None = None, - basin_code: str | Iterable[str] | None = None, - altitude: str | Iterable[str] | None = None, - altitude_accuracy: str | Iterable[str] | None = None, - altitude_method_code: str | Iterable[str] | None = None, - altitude_method_name: str | Iterable[str] | None = None, - vertical_datum: str | Iterable[str] | None = None, - vertical_datum_name: str | Iterable[str] | None = None, - horizontal_positional_accuracy_code: str | Iterable[str] | None = None, - horizontal_positional_accuracy: str | Iterable[str] | None = None, - horizontal_position_method_code: str | Iterable[str] | None = None, - horizontal_position_method_name: str | Iterable[str] | None = None, - original_horizontal_datum: str | Iterable[str] | None = None, - original_horizontal_datum_name: str | Iterable[str] | None = None, - drainage_area: str | Iterable[str] | None = None, - contributing_drainage_area: str | Iterable[str] | None = None, - time_zone_abbreviation: str | Iterable[str] | None = None, - uses_daylight_savings: str | Iterable[str] | None = None, - construction_date: str | Iterable[str] | None = None, - aquifer_code: str | Iterable[str] | None = None, - national_aquifer_code: str | Iterable[str] | None = None, - aquifer_type_code: str | Iterable[str] | None = None, - well_constructed_depth: str | Iterable[str] | None = None, - hole_constructed_depth: str | Iterable[str] | None = None, - depth_source_code: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + agency_code: StringFilter = None, + agency_name: StringFilter = None, + monitoring_location_number: StringFilter = None, + monitoring_location_name: StringFilter = None, + district_code: StringFilter = None, + country_code: StringFilter = None, + country_name: StringFilter = None, + state_code: StringFilter = None, + state_name: StringFilter = None, + county_code: StringFilter = None, + county_name: StringFilter = None, + minor_civil_division_code: StringFilter = None, + site_type_code: StringFilter = None, + site_type: StringFilter = None, + hydrologic_unit_code: StringFilter = None, + basin_code: StringFilter = None, + altitude: StringFilter = None, + altitude_accuracy: StringFilter = None, + altitude_method_code: StringFilter = None, + altitude_method_name: StringFilter = None, + vertical_datum: StringFilter = None, + vertical_datum_name: StringFilter = None, + horizontal_positional_accuracy_code: StringFilter = None, + horizontal_positional_accuracy: StringFilter = None, + horizontal_position_method_code: StringFilter = None, + horizontal_position_method_name: StringFilter = None, + original_horizontal_datum: StringFilter = None, + original_horizontal_datum_name: StringFilter = None, + drainage_area: StringFilter = None, + contributing_drainage_area: StringFilter = None, + time_zone_abbreviation: StringFilter = None, + uses_daylight_savings: StringFilter = None, + construction_date: StringFilter = None, + aquifer_code: StringFilter = None, + national_aquifer_code: StringFilter = None, + aquifer_type_code: StringFilter = None, + well_constructed_depth: StringFilter = None, + hole_constructed_depth: StringFilter = None, + depth_source_code: StringFilter = None, + properties: StringList = None, skip_geometry: bool | None = None, - time: str | Iterable[str] | None = None, + time: StringFilter = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -731,29 +732,29 @@ def get_monitoring_locations( def get_time_series_metadata( - monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, - parameter_name: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, - statistic_id: str | Iterable[str] | None = None, - hydrologic_unit_code: str | Iterable[str] | None = None, - state_name: str | Iterable[str] | None = None, - last_modified: str | Iterable[str] | None = None, - begin: str | Iterable[str] | None = None, - end: str | Iterable[str] | None = None, - begin_utc: str | Iterable[str] | None = None, - end_utc: str | Iterable[str] | None = None, - unit_of_measure: str | Iterable[str] | None = None, - computation_period_identifier: str | Iterable[str] | None = None, - computation_identifier: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + parameter_code: StringFilter = None, + parameter_name: StringFilter = None, + properties: StringList = None, + statistic_id: StringFilter = None, + hydrologic_unit_code: StringFilter = None, + state_name: StringFilter = None, + last_modified: StringFilter = None, + begin: StringFilter = None, + end: StringFilter = None, + begin_utc: StringFilter = None, + end_utc: StringFilter = None, + unit_of_measure: StringFilter = None, + computation_period_identifier: StringFilter = None, + computation_identifier: StringFilter = None, thresholds: int | None = None, - sublocation_identifier: str | Iterable[str] | None = None, - primary: str | Iterable[str] | None = None, - parent_time_series_id: str | Iterable[str] | None = None, - time_series_id: str | Iterable[str] | None = None, - web_description: str | Iterable[str] | None = None, + sublocation_identifier: StringFilter = None, + primary: StringFilter = None, + parent_time_series_id: StringFilter = None, + time_series_id: StringFilter = None, + web_description: StringFilter = None, skip_geometry: bool | None = None, - time: str | Iterable[str] | None = None, + time: StringFilter = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1187,19 +1188,19 @@ def get_combined_metadata( def get_latest_continuous( - monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, - statistic_id: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, - time_series_id: str | Iterable[str] | None = None, - latest_continuous_id: str | Iterable[str] | None = None, - approval_status: str | Iterable[str] | None = None, - unit_of_measure: str | Iterable[str] | None = None, - qualifier: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + parameter_code: StringFilter = None, + statistic_id: StringFilter = None, + properties: StringList = None, + time_series_id: StringFilter = None, + latest_continuous_id: StringFilter = None, + approval_status: StringFilter = None, + unit_of_measure: StringFilter = None, + qualifier: StringFilter = None, value: int | None = None, - last_modified: str | Iterable[str] | None = None, + last_modified: StringFilter = None, skip_geometry: bool | None = None, - time: str | Iterable[str] | None = None, + time: StringFilter = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1383,19 +1384,19 @@ def get_latest_continuous( def get_latest_daily( - monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, - statistic_id: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, - time_series_id: str | Iterable[str] | None = None, - latest_daily_id: str | Iterable[str] | None = None, - approval_status: str | Iterable[str] | None = None, - unit_of_measure: str | Iterable[str] | None = None, - qualifier: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + parameter_code: StringFilter = None, + statistic_id: StringFilter = None, + properties: StringList = None, + time_series_id: StringFilter = None, + latest_daily_id: StringFilter = None, + approval_status: StringFilter = None, + unit_of_measure: StringFilter = None, + qualifier: StringFilter = None, value: int | None = None, - last_modified: str | Iterable[str] | None = None, + last_modified: StringFilter = None, skip_geometry: bool | None = None, - time: str | Iterable[str] | None = None, + time: StringFilter = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1580,21 +1581,21 @@ def get_latest_daily( def get_field_measurements( - monitoring_location_id: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, - observing_procedure_code: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, - field_visit_id: str | Iterable[str] | None = None, - approval_status: str | Iterable[str] | None = None, - unit_of_measure: str | Iterable[str] | None = None, - qualifier: str | Iterable[str] | None = None, - value: str | Iterable[str] | None = None, - last_modified: str | Iterable[str] | None = None, - observing_procedure: str | Iterable[str] | None = None, - vertical_datum: str | Iterable[str] | None = None, - measuring_agency: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + parameter_code: StringFilter = None, + observing_procedure_code: StringFilter = None, + properties: StringList = None, + field_visit_id: StringFilter = None, + approval_status: StringFilter = None, + unit_of_measure: StringFilter = None, + qualifier: StringFilter = None, + value: StringFilter = None, + last_modified: StringFilter = None, + observing_procedure: StringFilter = None, + vertical_datum: StringFilter = None, + measuring_agency: StringFilter = None, skip_geometry: bool | None = None, - time: str | Iterable[str] | None = None, + time: StringFilter = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -2119,28 +2120,28 @@ def get_samples( ssl_check: bool = True, service: SERVICES = "results", profile: PROFILES = "fullphyschem", - activityMediaName: str | Iterable[str] | None = None, + activityMediaName: StringFilter = None, activityStartDateLower: str | None = None, activityStartDateUpper: str | None = None, - activityTypeCode: str | Iterable[str] | None = None, - characteristicGroup: str | Iterable[str] | None = None, - characteristic: str | Iterable[str] | None = None, - characteristicUserSupplied: str | Iterable[str] | None = None, + activityTypeCode: StringFilter = None, + characteristicGroup: StringFilter = None, + characteristic: StringFilter = None, + characteristicUserSupplied: StringFilter = None, boundingBox: list[float] | None = None, - countryFips: str | Iterable[str] | None = None, - stateFips: str | Iterable[str] | None = None, - countyFips: str | Iterable[str] | None = None, - siteTypeCode: str | Iterable[str] | None = None, - siteTypeName: str | Iterable[str] | None = None, - usgsPCode: str | Iterable[str] | None = None, - hydrologicUnit: str | Iterable[str] | None = None, - monitoringLocationIdentifier: str | Iterable[str] | None = None, - organizationIdentifier: str | Iterable[str] | None = None, + countryFips: StringFilter = None, + stateFips: StringFilter = None, + countyFips: StringFilter = None, + siteTypeCode: StringFilter = None, + siteTypeName: StringFilter = None, + usgsPCode: StringFilter = None, + hydrologicUnit: StringFilter = None, + monitoringLocationIdentifier: StringFilter = None, + organizationIdentifier: StringFilter = None, pointLocationLatitude: float | None = None, pointLocationLongitude: float | None = None, pointLocationWithinMiles: float | None = None, - projectIdentifier: str | Iterable[str] | None = None, - recordIdentifierUserSupplied: str | Iterable[str] | None = None, + projectIdentifier: StringFilter = None, + recordIdentifierUserSupplied: StringFilter = None, ) -> tuple[pd.DataFrame, BaseMetadata]: """Search Samples database for USGS water quality data. This is a wrapper function for the Samples database API. All potential @@ -2418,18 +2419,18 @@ def get_samples_summary( def get_stats_por( approval_status: str | None = None, - computation_type: str | Iterable[str] | None = None, - country_code: str | Iterable[str] | None = None, - state_code: str | Iterable[str] | None = None, - county_code: str | Iterable[str] | None = None, + computation_type: StringFilter = None, + country_code: StringFilter = None, + state_code: StringFilter = None, + county_code: StringFilter = None, start_date: str | None = None, end_date: str | None = None, - monitoring_location_id: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, page_size: int = 1000, - parent_time_series_id: str | Iterable[str] | None = None, - site_type_code: str | Iterable[str] | None = None, - site_type_name: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, + parent_time_series_id: StringFilter = None, + site_type_code: StringFilter = None, + site_type_name: StringFilter = None, + parameter_code: StringFilter = None, expand_percentiles: bool = True, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get day-of-year and month-of-year water data statistics from the @@ -2543,18 +2544,18 @@ def get_stats_por( def get_stats_date_range( approval_status: str | None = None, - computation_type: str | Iterable[str] | None = None, - country_code: str | Iterable[str] | None = None, - state_code: str | Iterable[str] | None = None, - county_code: str | Iterable[str] | None = None, + computation_type: StringFilter = None, + country_code: StringFilter = None, + state_code: StringFilter = None, + county_code: StringFilter = None, start_date: str | None = None, end_date: str | None = None, - monitoring_location_id: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, page_size: int = 1000, - parent_time_series_id: str | Iterable[str] | None = None, - site_type_code: str | Iterable[str] | None = None, - site_type_name: str | Iterable[str] | None = None, - parameter_code: str | Iterable[str] | None = None, + parent_time_series_id: StringFilter = None, + site_type_code: StringFilter = None, + site_type_name: StringFilter = None, + parameter_code: StringFilter = None, expand_percentiles: bool = True, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get monthly and annual water data statistics from the USGS Water Data API. @@ -2674,31 +2675,31 @@ def get_stats_date_range( def get_channel( - monitoring_location_id: str | Iterable[str] | None = None, - field_visit_id: str | Iterable[str] | None = None, - measurement_number: str | Iterable[str] | None = None, - time: str | Iterable[str] | None = None, - channel_name: str | Iterable[str] | None = None, - channel_flow: str | Iterable[str] | None = None, - channel_flow_unit: str | Iterable[str] | None = None, - channel_width: str | Iterable[str] | None = None, - channel_width_unit: str | Iterable[str] | None = None, - channel_area: str | Iterable[str] | None = None, - channel_area_unit: str | Iterable[str] | None = None, - channel_velocity: str | Iterable[str] | None = None, - channel_velocity_unit: str | Iterable[str] | None = None, - channel_location_distance: str | Iterable[str] | None = None, - channel_location_distance_unit: str | Iterable[str] | None = None, - channel_stability: str | Iterable[str] | None = None, - channel_material: str | Iterable[str] | None = None, - channel_evenness: str | Iterable[str] | None = None, - horizontal_velocity_description: str | Iterable[str] | None = None, - vertical_velocity_description: str | Iterable[str] | None = None, - longitudinal_velocity_description: str | Iterable[str] | None = None, - measurement_type: str | Iterable[str] | None = None, - last_modified: str | Iterable[str] | None = None, - channel_measurement_type: str | Iterable[str] | None = None, - properties: str | Iterable[str] | None = None, + monitoring_location_id: StringFilter = None, + field_visit_id: StringFilter = None, + measurement_number: StringFilter = None, + time: StringFilter = None, + channel_name: StringFilter = None, + channel_flow: StringFilter = None, + channel_flow_unit: StringFilter = None, + channel_width: StringFilter = None, + channel_width_unit: StringFilter = None, + channel_area: StringFilter = None, + channel_area_unit: StringFilter = None, + channel_velocity: StringFilter = None, + channel_velocity_unit: StringFilter = None, + channel_location_distance: StringFilter = None, + channel_location_distance_unit: StringFilter = None, + channel_stability: StringFilter = None, + channel_material: StringFilter = None, + channel_evenness: StringFilter = None, + horizontal_velocity_description: StringFilter = None, + vertical_velocity_description: StringFilter = None, + longitudinal_velocity_description: StringFilter = None, + measurement_type: StringFilter = None, + last_modified: StringFilter = None, + channel_measurement_type: StringFilter = None, + properties: StringList = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, diff --git a/dataretrieval/waterdata/types.py b/dataretrieval/waterdata/types.py index f5e1496b..b2477b29 100644 --- a/dataretrieval/waterdata/types.py +++ b/dataretrieval/waterdata/types.py @@ -1,4 +1,21 @@ -from typing import Literal +from collections.abc import Iterable +from typing import Literal, Optional, Union + +# Multi-value string filter: accepts a single string (single value), +# or any iterable of strings (list, tuple, ``pandas.Series``, +# ``numpy.ndarray``, generator). Iterables are materialized to a list +# internally; the OGC API receives the value(s) comma-joined in the URL +# — or, if the list is long enough to overflow the URL, the request +# switches to POST/CQL2. +StringFilter = Optional[Union[str, Iterable[str]]] + +# A list of string property/column names, comma-joined into the URL. +# Unlike ``StringFilter``, a single string passed here would be iterated +# as characters by ``",".join(...)`` and produce a malformed URL — so +# the type explicitly excludes ``str``. ``_get_args`` does wrap a stray +# single-string input into a one-element list at runtime as a +# convenience, but users are encouraged to pass a list. +StringList = Optional[Iterable[str]] CODE_SERVICES = Literal[ "characteristicgroup", diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 94acefd8..77b93f28 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -231,6 +231,8 @@ def _format_api_dates( converted from that offset to UTC; naive inputs are interpreted in the local time zone for backwards compatibility. """ + if datetime_input is None: + return None # Get timezone local_timezone = datetime.now().astimezone().tzinfo @@ -466,9 +468,12 @@ def _construct_api_requests( dates = service == "daily" and i != "last_modified" params[i] = _format_api_dates(params[i], date=dates) - # String together bbox elements from a list to a comma-separated string, - # and string together properties if provided - if bbox: + # Join bbox/properties into the comma-separated form the OGC API expects. + # For ``bbox`` use ``len() > 0`` so ``numpy.ndarray`` inputs don't trip + # the ambiguous truth-value error; for ``properties`` the truthy check is + # right because ``_get_args`` always materializes it to a list (and + # ``_switch_properties_id`` further upstream returns ``[]`` for None). + if bbox is not None and len(bbox) > 0: params["bbox"] = ",".join(map(str, bbox)) if properties: params["properties"] = ",".join(properties) @@ -1182,9 +1187,20 @@ def _check_profiles( # Param names that ``_get_args`` must NOT push through ``_normalize_str_iterable``. # Scalar non-string knobs and ``list[float]`` params are detected by runtime # type; only string-iterable-shaped params with special handling need to be -# named here: ``monitoring_location_id`` (validated separately) and the date- -# range params (which may contain ``pd.NaT``/None or interval strings). -_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"monitoring_location_id"} +# named here: ``monitoring_location_id`` (validated separately), date-range +# params (which may contain ``pd.NaT``/None or interval strings), and bbox +# inputs (``list[float]``, sometimes a ``numpy.ndarray``). +_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | { + "monitoring_location_id", + "bbox", + "boundingBox", +} + +# Param names that must be a list of strings (never a single string). +# A single string passed in would iterate as characters in +# ``_construct_api_requests``'s ``",".join(...)`` step, producing a +# malformed URL. ``_get_args`` wraps single-string input into a list. +_LIST_ONLY_STR_PARAMS = frozenset({"properties"}) def _normalize_str_iterable( @@ -1321,7 +1337,11 @@ def _get_args( for k, v in local_vars.items(): if k in to_exclude or v is None: continue - if ( + if k in _LIST_ONLY_STR_PARAMS: + # Wrap a single string so the downstream `",".join(...)` doesn't + # iterate it as characters. + args[k] = [v] if isinstance(v, str) else _normalize_str_iterable(v, k) + elif ( k in _NO_NORMALIZE_PARAMS or isinstance(v, str) or not isinstance(v, Iterable) From 07ed123075d6993524458e472c3e1e4048494aa1 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 10:09:19 -0500 Subject: [PATCH 11/21] Revert StringFilter/StringList aliases; use inline PEP 604 unions Per code-review feedback: domain-specific type aliases hide what the annotations actually mean. PEP 604 unions are the pythonic idiom and show the caller exactly what's accepted at the call site. - `StringFilter` (172 sites) -> `str | Iterable[str] | None` - `StringList` (8 sites) -> `list[str] | None` Runtime behavior is unchanged: `_get_args` still routes filter params through `_normalize_str_iterable` (accepts str, list, tuple, Series, ndarray, generator) and `properties` through `_LIST_ONLY_STR_PARAMS` (wraps stray single-string input into a one-element list). --- dataretrieval/waterdata/api.py | 363 +++++++++++++++---------------- dataretrieval/waterdata/types.py | 19 +- 2 files changed, 182 insertions(+), 200 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index b42af094..3fe2ad64 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -8,6 +8,7 @@ import json import logging +from collections.abc import Iterable from io import StringIO from typing import get_args from urllib.parse import quote @@ -23,8 +24,6 @@ METADATA_COLLECTIONS, PROFILES, SERVICES, - StringFilter, - StringList, ) from dataretrieval.waterdata.utils import ( SAMPLES_URL, @@ -41,19 +40,19 @@ def get_daily( - monitoring_location_id: StringFilter = None, - parameter_code: StringFilter = None, - statistic_id: StringFilter = None, - properties: StringList = None, - time_series_id: StringFilter = None, - daily_id: StringFilter = None, - approval_status: StringFilter = None, - unit_of_measure: StringFilter = None, - qualifier: StringFilter = None, - value: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: list[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + daily_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, + value: str | Iterable[str] | None = None, last_modified: str | None = None, skip_geometry: bool | None = None, - time: StringFilter = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -244,18 +243,18 @@ def get_daily( def get_continuous( - monitoring_location_id: StringFilter = None, - parameter_code: StringFilter = None, - statistic_id: StringFilter = None, - properties: StringList = None, - time_series_id: StringFilter = None, - continuous_id: StringFilter = None, - approval_status: StringFilter = None, - unit_of_measure: StringFilter = None, - qualifier: StringFilter = None, - value: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: list[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + continuous_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, + value: str | Iterable[str] | None = None, last_modified: str | None = None, - time: StringFilter = None, + time: str | Iterable[str] | None = None, limit: int | None = None, filter: str | None = None, filter_lang: FILTER_LANG | None = None, @@ -433,49 +432,49 @@ def get_continuous( def get_monitoring_locations( - monitoring_location_id: StringFilter = None, - agency_code: StringFilter = None, - agency_name: StringFilter = None, - monitoring_location_number: StringFilter = None, - monitoring_location_name: StringFilter = None, - district_code: StringFilter = None, - country_code: StringFilter = None, - country_name: StringFilter = None, - state_code: StringFilter = None, - state_name: StringFilter = None, - county_code: StringFilter = None, - county_name: StringFilter = None, - minor_civil_division_code: StringFilter = None, - site_type_code: StringFilter = None, - site_type: StringFilter = None, - hydrologic_unit_code: StringFilter = None, - basin_code: StringFilter = None, - altitude: StringFilter = None, - altitude_accuracy: StringFilter = None, - altitude_method_code: StringFilter = None, - altitude_method_name: StringFilter = None, - vertical_datum: StringFilter = None, - vertical_datum_name: StringFilter = None, - horizontal_positional_accuracy_code: StringFilter = None, - horizontal_positional_accuracy: StringFilter = None, - horizontal_position_method_code: StringFilter = None, - horizontal_position_method_name: StringFilter = None, - original_horizontal_datum: StringFilter = None, - original_horizontal_datum_name: StringFilter = None, - drainage_area: StringFilter = None, - contributing_drainage_area: StringFilter = None, - time_zone_abbreviation: StringFilter = None, - uses_daylight_savings: StringFilter = None, - construction_date: StringFilter = None, - aquifer_code: StringFilter = None, - national_aquifer_code: StringFilter = None, - aquifer_type_code: StringFilter = None, - well_constructed_depth: StringFilter = None, - hole_constructed_depth: StringFilter = None, - depth_source_code: StringFilter = None, - properties: StringList = None, + monitoring_location_id: str | Iterable[str] | None = None, + agency_code: str | Iterable[str] | None = None, + agency_name: str | Iterable[str] | None = None, + monitoring_location_number: str | Iterable[str] | None = None, + monitoring_location_name: str | Iterable[str] | None = None, + district_code: str | Iterable[str] | None = None, + country_code: str | Iterable[str] | None = None, + country_name: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + state_name: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, + county_name: str | Iterable[str] | None = None, + minor_civil_division_code: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type: str | Iterable[str] | None = None, + hydrologic_unit_code: str | Iterable[str] | None = None, + basin_code: str | Iterable[str] | None = None, + altitude: str | Iterable[str] | None = None, + altitude_accuracy: str | Iterable[str] | None = None, + altitude_method_code: str | Iterable[str] | None = None, + altitude_method_name: str | Iterable[str] | None = None, + vertical_datum: str | Iterable[str] | None = None, + vertical_datum_name: str | Iterable[str] | None = None, + horizontal_positional_accuracy_code: str | Iterable[str] | None = None, + horizontal_positional_accuracy: str | Iterable[str] | None = None, + horizontal_position_method_code: str | Iterable[str] | None = None, + horizontal_position_method_name: str | Iterable[str] | None = None, + original_horizontal_datum: str | Iterable[str] | None = None, + original_horizontal_datum_name: str | Iterable[str] | None = None, + drainage_area: str | Iterable[str] | None = None, + contributing_drainage_area: str | Iterable[str] | None = None, + time_zone_abbreviation: str | Iterable[str] | None = None, + uses_daylight_savings: str | Iterable[str] | None = None, + construction_date: str | Iterable[str] | None = None, + aquifer_code: str | Iterable[str] | None = None, + national_aquifer_code: str | Iterable[str] | None = None, + aquifer_type_code: str | Iterable[str] | None = None, + well_constructed_depth: str | Iterable[str] | None = None, + hole_constructed_depth: str | Iterable[str] | None = None, + depth_source_code: str | Iterable[str] | None = None, + properties: list[str] | None = None, skip_geometry: bool | None = None, - time: StringFilter = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -732,29 +731,29 @@ def get_monitoring_locations( def get_time_series_metadata( - monitoring_location_id: StringFilter = None, - parameter_code: StringFilter = None, - parameter_name: StringFilter = None, - properties: StringList = None, - statistic_id: StringFilter = None, - hydrologic_unit_code: StringFilter = None, - state_name: StringFilter = None, - last_modified: StringFilter = None, - begin: StringFilter = None, - end: StringFilter = None, - begin_utc: StringFilter = None, - end_utc: StringFilter = None, - unit_of_measure: StringFilter = None, - computation_period_identifier: StringFilter = None, - computation_identifier: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + parameter_name: str | Iterable[str] | None = None, + properties: list[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + hydrologic_unit_code: str | Iterable[str] | None = None, + state_name: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, + begin: str | Iterable[str] | None = None, + end: str | Iterable[str] | None = None, + begin_utc: str | Iterable[str] | None = None, + end_utc: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + computation_period_identifier: str | Iterable[str] | None = None, + computation_identifier: str | Iterable[str] | None = None, thresholds: int | None = None, - sublocation_identifier: StringFilter = None, - primary: StringFilter = None, - parent_time_series_id: StringFilter = None, - time_series_id: StringFilter = None, - web_description: StringFilter = None, + sublocation_identifier: str | Iterable[str] | None = None, + primary: str | Iterable[str] | None = None, + parent_time_series_id: str | Iterable[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + web_description: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: StringFilter = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1188,19 +1187,19 @@ def get_combined_metadata( def get_latest_continuous( - monitoring_location_id: StringFilter = None, - parameter_code: StringFilter = None, - statistic_id: StringFilter = None, - properties: StringList = None, - time_series_id: StringFilter = None, - latest_continuous_id: StringFilter = None, - approval_status: StringFilter = None, - unit_of_measure: StringFilter = None, - qualifier: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: list[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + latest_continuous_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, value: int | None = None, - last_modified: StringFilter = None, + last_modified: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: StringFilter = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1384,19 +1383,19 @@ def get_latest_continuous( def get_latest_daily( - monitoring_location_id: StringFilter = None, - parameter_code: StringFilter = None, - statistic_id: StringFilter = None, - properties: StringList = None, - time_series_id: StringFilter = None, - latest_daily_id: StringFilter = None, - approval_status: StringFilter = None, - unit_of_measure: StringFilter = None, - qualifier: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + properties: list[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + latest_daily_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, value: int | None = None, - last_modified: StringFilter = None, + last_modified: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: StringFilter = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -1581,21 +1580,21 @@ def get_latest_daily( def get_field_measurements( - monitoring_location_id: StringFilter = None, - parameter_code: StringFilter = None, - observing_procedure_code: StringFilter = None, - properties: StringList = None, - field_visit_id: StringFilter = None, - approval_status: StringFilter = None, - unit_of_measure: StringFilter = None, - qualifier: StringFilter = None, - value: StringFilter = None, - last_modified: StringFilter = None, - observing_procedure: StringFilter = None, - vertical_datum: StringFilter = None, - measuring_agency: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + observing_procedure_code: str | Iterable[str] | None = None, + properties: list[str] | None = None, + field_visit_id: str | Iterable[str] | None = None, + approval_status: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + qualifier: str | Iterable[str] | None = None, + value: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, + observing_procedure: str | Iterable[str] | None = None, + vertical_datum: str | Iterable[str] | None = None, + measuring_agency: str | Iterable[str] | None = None, skip_geometry: bool | None = None, - time: StringFilter = None, + time: str | Iterable[str] | None = None, bbox: list[float] | None = None, limit: int | None = None, filter: str | None = None, @@ -2120,28 +2119,28 @@ def get_samples( ssl_check: bool = True, service: SERVICES = "results", profile: PROFILES = "fullphyschem", - activityMediaName: StringFilter = None, + activityMediaName: str | Iterable[str] | None = None, activityStartDateLower: str | None = None, activityStartDateUpper: str | None = None, - activityTypeCode: StringFilter = None, - characteristicGroup: StringFilter = None, - characteristic: StringFilter = None, - characteristicUserSupplied: StringFilter = None, + activityTypeCode: str | Iterable[str] | None = None, + characteristicGroup: str | Iterable[str] | None = None, + characteristic: str | Iterable[str] | None = None, + characteristicUserSupplied: str | Iterable[str] | None = None, boundingBox: list[float] | None = None, - countryFips: StringFilter = None, - stateFips: StringFilter = None, - countyFips: StringFilter = None, - siteTypeCode: StringFilter = None, - siteTypeName: StringFilter = None, - usgsPCode: StringFilter = None, - hydrologicUnit: StringFilter = None, - monitoringLocationIdentifier: StringFilter = None, - organizationIdentifier: StringFilter = None, + countryFips: str | Iterable[str] | None = None, + stateFips: str | Iterable[str] | None = None, + countyFips: str | Iterable[str] | None = None, + siteTypeCode: str | Iterable[str] | None = None, + siteTypeName: str | Iterable[str] | None = None, + usgsPCode: str | Iterable[str] | None = None, + hydrologicUnit: str | Iterable[str] | None = None, + monitoringLocationIdentifier: str | Iterable[str] | None = None, + organizationIdentifier: str | Iterable[str] | None = None, pointLocationLatitude: float | None = None, pointLocationLongitude: float | None = None, pointLocationWithinMiles: float | None = None, - projectIdentifier: StringFilter = None, - recordIdentifierUserSupplied: StringFilter = None, + projectIdentifier: str | Iterable[str] | None = None, + recordIdentifierUserSupplied: str | Iterable[str] | None = None, ) -> tuple[pd.DataFrame, BaseMetadata]: """Search Samples database for USGS water quality data. This is a wrapper function for the Samples database API. All potential @@ -2419,18 +2418,18 @@ def get_samples_summary( def get_stats_por( approval_status: str | None = None, - computation_type: StringFilter = None, - country_code: StringFilter = None, - state_code: StringFilter = None, - county_code: StringFilter = None, + computation_type: str | Iterable[str] | None = None, + country_code: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, start_date: str | None = None, end_date: str | None = None, - monitoring_location_id: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, page_size: int = 1000, - parent_time_series_id: StringFilter = None, - site_type_code: StringFilter = None, - site_type_name: StringFilter = None, - parameter_code: StringFilter = None, + parent_time_series_id: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type_name: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, expand_percentiles: bool = True, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get day-of-year and month-of-year water data statistics from the @@ -2544,18 +2543,18 @@ def get_stats_por( def get_stats_date_range( approval_status: str | None = None, - computation_type: StringFilter = None, - country_code: StringFilter = None, - state_code: StringFilter = None, - county_code: StringFilter = None, + computation_type: str | Iterable[str] | None = None, + country_code: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, start_date: str | None = None, end_date: str | None = None, - monitoring_location_id: StringFilter = None, + monitoring_location_id: str | Iterable[str] | None = None, page_size: int = 1000, - parent_time_series_id: StringFilter = None, - site_type_code: StringFilter = None, - site_type_name: StringFilter = None, - parameter_code: StringFilter = None, + parent_time_series_id: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type_name: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, expand_percentiles: bool = True, ) -> tuple[pd.DataFrame, BaseMetadata]: """Get monthly and annual water data statistics from the USGS Water Data API. @@ -2675,31 +2674,31 @@ def get_stats_date_range( def get_channel( - monitoring_location_id: StringFilter = None, - field_visit_id: StringFilter = None, - measurement_number: StringFilter = None, - time: StringFilter = None, - channel_name: StringFilter = None, - channel_flow: StringFilter = None, - channel_flow_unit: StringFilter = None, - channel_width: StringFilter = None, - channel_width_unit: StringFilter = None, - channel_area: StringFilter = None, - channel_area_unit: StringFilter = None, - channel_velocity: StringFilter = None, - channel_velocity_unit: StringFilter = None, - channel_location_distance: StringFilter = None, - channel_location_distance_unit: StringFilter = None, - channel_stability: StringFilter = None, - channel_material: StringFilter = None, - channel_evenness: StringFilter = None, - horizontal_velocity_description: StringFilter = None, - vertical_velocity_description: StringFilter = None, - longitudinal_velocity_description: StringFilter = None, - measurement_type: StringFilter = None, - last_modified: StringFilter = None, - channel_measurement_type: StringFilter = None, - properties: StringList = None, + monitoring_location_id: str | Iterable[str] | None = None, + field_visit_id: str | Iterable[str] | None = None, + measurement_number: str | Iterable[str] | None = None, + time: str | Iterable[str] | None = None, + channel_name: str | Iterable[str] | None = None, + channel_flow: str | Iterable[str] | None = None, + channel_flow_unit: str | Iterable[str] | None = None, + channel_width: str | Iterable[str] | None = None, + channel_width_unit: str | Iterable[str] | None = None, + channel_area: str | Iterable[str] | None = None, + channel_area_unit: str | Iterable[str] | None = None, + channel_velocity: str | Iterable[str] | None = None, + channel_velocity_unit: str | Iterable[str] | None = None, + channel_location_distance: str | Iterable[str] | None = None, + channel_location_distance_unit: str | Iterable[str] | None = None, + channel_stability: str | Iterable[str] | None = None, + channel_material: str | Iterable[str] | None = None, + channel_evenness: str | Iterable[str] | None = None, + horizontal_velocity_description: str | Iterable[str] | None = None, + vertical_velocity_description: str | Iterable[str] | None = None, + longitudinal_velocity_description: str | Iterable[str] | None = None, + measurement_type: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, + channel_measurement_type: str | Iterable[str] | None = None, + properties: list[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, diff --git a/dataretrieval/waterdata/types.py b/dataretrieval/waterdata/types.py index b2477b29..f5e1496b 100644 --- a/dataretrieval/waterdata/types.py +++ b/dataretrieval/waterdata/types.py @@ -1,21 +1,4 @@ -from collections.abc import Iterable -from typing import Literal, Optional, Union - -# Multi-value string filter: accepts a single string (single value), -# or any iterable of strings (list, tuple, ``pandas.Series``, -# ``numpy.ndarray``, generator). Iterables are materialized to a list -# internally; the OGC API receives the value(s) comma-joined in the URL -# — or, if the list is long enough to overflow the URL, the request -# switches to POST/CQL2. -StringFilter = Optional[Union[str, Iterable[str]]] - -# A list of string property/column names, comma-joined into the URL. -# Unlike ``StringFilter``, a single string passed here would be iterated -# as characters by ``",".join(...)`` and produce a malformed URL — so -# the type explicitly excludes ``str``. ``_get_args`` does wrap a stray -# single-string input into a one-element list at runtime as a -# convenience, but users are encouraged to pass a list. -StringList = Optional[Iterable[str]] +from typing import Literal CODE_SERVICES = Literal[ "characteristicgroup", From 8dc70b22021ccdfc8a0f7afd79a95873c0c868c2 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 10:48:06 -0500 Subject: [PATCH 12/21] Reject list-of-non-strings at boundary instead of silently passing through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live stress test found that `parameter_code=[60, 65]` (ints) was silently passed to the OGC API, surfacing as a confusing JSONDecodeError when the server returned an error page. The "list-of-non-str pass-through" clause in `_get_args` was a defensive shortcut intended for `bbox`/`boundingBox` (which are `list[float]`), but those params are already covered by `_NO_NORMALIZE_PARAMS`, making the clause redundant AND bug-silencing. Now `_normalize_str_iterable` runs for every non-listed list-shaped param, raising TypeError with the offending element type — same path that already handles `monitoring_location_id=[..., 12345]`. --- dataretrieval/waterdata/utils.py | 1 - tests/waterdata_test.py | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 77b93f28..bb00e1f8 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1345,7 +1345,6 @@ def _get_args( k in _NO_NORMALIZE_PARAMS or isinstance(v, str) or not isinstance(v, Iterable) - or (isinstance(v, (list, tuple)) and v and not isinstance(v[0], str)) ): args[k] = v else: diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index fbf9c705..e2ba4da8 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -680,3 +680,17 @@ def test_get_daily_parameter_code_as_series(self): args_dict = fake.call_args[0][0] assert args_dict["parameter_code"] == ["00060", "00010"] assert isinstance(args_dict["parameter_code"], list) + + def test_list_of_ints_rejected_at_boundary(self): + """List-of-non-strings must be caught client-side, not silently sent. + + Regression: an earlier pass through ``_get_args`` had a + ``list-of-non-str`` fast-path that bypassed normalization, so + ``parameter_code=[60, 65]`` would reach the OGC API and surface as + a confusing JSONDecodeError on the malformed response. + """ + with pytest.raises(TypeError, match="parameter_code elements must be strings"): + get_daily( + monitoring_location_id="USGS-05427718", + parameter_code=[60, 65], + ) From 1d7a6e76246ee62a592f396b469116e3042b0c2d Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 11:14:10 -0500 Subject: [PATCH 13/21] Allow int-valued list filters in _get_args (water_year, year, month, day, peak_since) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `get_peaks` (merged in fca3d6c) introduced five `int | list[int] | None` filters. The prior commit removed `_get_args`'s "list-of-non-str pass-through" clause to catch user errors like `parameter_code=[60, 65]` client-side — but the same clause was the only thing letting `water_year=[2020]` through. CI surfaced this via `test_get_peaks_water_year_filter`. Add the five known int-list filter names to `_NO_NORMALIZE_PARAMS` so they bypass string-iterable normalization. Existing string-list params still validate, and `parameter_code=[60, 65]` still raises TypeError client-side as intended. If future int-list params are added, they must be opted in here — this is intentional: the cost of one line per new param is a fair price for not silently passing user errors to the API. --- dataretrieval/waterdata/utils.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index bb00e1f8..d40b80c6 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1185,15 +1185,21 @@ def _check_profiles( # Param names that ``_get_args`` must NOT push through ``_normalize_str_iterable``. -# Scalar non-string knobs and ``list[float]`` params are detected by runtime -# type; only string-iterable-shaped params with special handling need to be -# named here: ``monitoring_location_id`` (validated separately), date-range -# params (which may contain ``pd.NaT``/None or interval strings), and bbox -# inputs (``list[float]``, sometimes a ``numpy.ndarray``). +# Scalar non-string knobs are detected by runtime type; only iterable-shaped +# params with special handling need to be named here: +# - ``monitoring_location_id`` is validated separately (AGENCY-ID format) +# - date-range params may contain ``pd.NaT``/None or interval strings +# - ``bbox``/``boundingBox`` are ``list[float]``, sometimes ``numpy.ndarray`` +# - ``get_peaks``'s int-valued filters (``water_year`` etc.) are ``list[int]`` _NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | { "monitoring_location_id", "bbox", "boundingBox", + "water_year", + "year", + "month", + "day", + "peak_since", } # Param names that must be a list of strings (never a single string). From aa98d23e833bdb302398926cf27a77cc7975600c Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 11:34:29 -0500 Subject: [PATCH 14/21] Close Copilot review gaps: extend AGENCY-ID check + iterable normalization to all OGC functions Four issues from Copilot's latest review: 1. `_check_monitoring_location_id` was missing from `get_combined_metadata`, `get_field_measurements_metadata`, and `get_peaks`. Bad mloc inputs to those callers reached the API. 2. `get_channel` built its `args` dict from a raw `locals()` comprehension instead of `_get_args`, so non-string iterables (`pd.Series`, `np.ndarray`, generators) were never materialized before request construction. 3. `get_combined_metadata`'s `thresholds: float | list[float]` filter was being routed through `_normalize_str_iterable`, which would reject `[1.0, 2.0]` as non-string. Added to `_NO_NORMALIZE_PARAMS`. 4. The `properties` annotation was `list[str] | None` in 8 functions, but `_get_args` wraps a single-string input into a list at runtime (via `_LIST_ONLY_STR_PARAMS`). Widened to `str | list[str] | None` so type checkers don't reject the supported `properties="time_series_id"` call shape. --- dataretrieval/waterdata/api.py | 26 ++++++++++++-------------- dataretrieval/waterdata/utils.py | 1 + 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 3fe2ad64..810c3678 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -43,7 +43,7 @@ def get_daily( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, time_series_id: str | Iterable[str] | None = None, daily_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -246,7 +246,7 @@ def get_continuous( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, time_series_id: str | Iterable[str] | None = None, continuous_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -472,7 +472,7 @@ def get_monitoring_locations( well_constructed_depth: str | Iterable[str] | None = None, hole_constructed_depth: str | Iterable[str] | None = None, depth_source_code: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, skip_geometry: bool | None = None, time: str | Iterable[str] | None = None, bbox: list[float] | None = None, @@ -734,7 +734,7 @@ def get_time_series_metadata( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, parameter_name: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, statistic_id: str | Iterable[str] | None = None, hydrologic_unit_code: str | Iterable[str] | None = None, state_name: str | Iterable[str] | None = None, @@ -1181,6 +1181,7 @@ def get_combined_metadata( service = "combined-metadata" output_id = "combined_meta_id" + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -1190,7 +1191,7 @@ def get_latest_continuous( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, time_series_id: str | Iterable[str] | None = None, latest_continuous_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -1386,7 +1387,7 @@ def get_latest_daily( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, time_series_id: str | Iterable[str] | None = None, latest_daily_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -1583,7 +1584,7 @@ def get_field_measurements( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, observing_procedure_code: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, field_visit_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, unit_of_measure: str | Iterable[str] | None = None, @@ -1882,6 +1883,7 @@ def get_field_measurements_metadata( service = "field-measurements-metadata" output_id = "field_series_id" + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -2002,6 +2004,7 @@ def get_peaks( service = "peaks" output_id = "peak_id" + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -2698,7 +2701,7 @@ def get_channel( measurement_type: str | Iterable[str] | None = None, last_modified: str | Iterable[str] | None = None, channel_measurement_type: str | Iterable[str] | None = None, - properties: list[str] | None = None, + properties: str | list[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, @@ -2840,11 +2843,6 @@ def get_channel( service = "channel-measurements" output_id = "channel_measurements_id" - # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index d40b80c6..5de1534a 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1200,6 +1200,7 @@ def _check_profiles( "month", "day", "peak_since", + "thresholds", } # Param names that must be a list of strings (never a single string). From 42852a866472d0fcd38141a4e777a513dcb4fbe4 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 11:55:56 -0500 Subject: [PATCH 15/21] Centralize monitoring_location_id check in _get_args; trim narration /simplify findings: 1. Move the per-function `monitoring_location_id = _check_monitoring_location_id(...)` into `_get_args` itself. Eliminates 13 copy-paste call sites in api.py and closes the bug class Copilot found twice (four functions had been missed when added piecemeal). New `get_*` functions inherit validation automatically. 2. Drop `_LIST_ONLY_STR_PARAMS` (a one-element `frozenset`). Inline the `properties` special case with a literal `elif k == "properties":` plus a one-line WHY comment. 3. Compress the three-paragraph narration in `_construct_api_requests` to a single line explaining the only non-obvious bit (`len()` instead of truthiness, because of numpy ndarray). 4. Add `begin_utc`/`end_utc` to `_DATE_RANGE_PARAMS`. `get_time_series_metadata` exposes both as range filters but the constant was missing them, so a two-element list would have been treated as a multi-value POST/CQL2 filter instead of being formatted as an ISO-8601 interval. 5. Drop the now-unused `_check_monitoring_location_id` import from api.py. --- dataretrieval/waterdata/api.py | 14 -------------- dataretrieval/waterdata/utils.py | 23 ++++++++--------------- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 810c3678..59d64092 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -27,7 +27,6 @@ ) from dataretrieval.waterdata.utils import ( SAMPLES_URL, - _check_monitoring_location_id, _check_profiles, _default_headers, _get_args, @@ -232,7 +231,6 @@ def get_daily( ... last_modified="P7D", ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "daily" output_id = "daily_id" @@ -421,7 +419,6 @@ def get_continuous( ... filter_lang="cql-text", ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "continuous" output_id = "continuous_id" @@ -720,7 +717,6 @@ def get_monitoring_locations( ... properties=["monitoring_location_id", "state_name", "country_name"], ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "monitoring-locations" output_id = "monitoring_location_id" @@ -944,7 +940,6 @@ def get_time_series_metadata( ... begin="1990-01-01/..", ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "time-series-metadata" output_id = "time_series_id" @@ -1181,7 +1176,6 @@ def get_combined_metadata( service = "combined-metadata" output_id = "combined_meta_id" - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -1373,7 +1367,6 @@ def get_latest_continuous( ... monitoring_location_id=["USGS-05114000", "USGS-09423350"] ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "latest-continuous" output_id = "latest_continuous_id" @@ -1570,7 +1563,6 @@ def get_latest_daily( ... monitoring_location_id=["USGS-05114000", "USGS-09423350"] ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "latest-daily" output_id = "latest_daily_id" @@ -1761,7 +1753,6 @@ def get_field_measurements( ... time="P20Y", ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "field-measurements" output_id = "field_measurement_id" @@ -1883,7 +1874,6 @@ def get_field_measurements_metadata( service = "field-measurements-metadata" output_id = "field_series_id" - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -2004,7 +1994,6 @@ def get_peaks( service = "peaks" output_id = "peak_id" - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -2536,7 +2525,6 @@ def get_stats_por( ... ) """ # Build argument dictionary, omitting None values - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( @@ -2666,7 +2654,6 @@ def get_stats_date_range( ... ) """ # Build argument dictionary, omitting None values - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( @@ -2839,7 +2826,6 @@ def get_channel( ... monitoring_location_id="USGS-02238500", ... ) """ - monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) service = "channel-measurements" output_id = "channel_measurements_id" diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 5de1534a..5f5404c5 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -149,7 +149,9 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s # string list. Used by ``_construct_api_requests`` to keep them out of the # POST/CQL2 multi-value path and to route them through ``_format_api_dates``, # and by ``_NO_NORMALIZE_PARAMS`` to bypass string-iterable normalization. -_DATE_RANGE_PARAMS = frozenset({"datetime", "last_modified", "begin", "end", "time"}) +_DATE_RANGE_PARAMS = frozenset( + {"datetime", "last_modified", "begin", "begin_utc", "end", "end_utc", "time"} +) def _parse_datetime(value: str) -> datetime | None: @@ -468,11 +470,7 @@ def _construct_api_requests( dates = service == "daily" and i != "last_modified" params[i] = _format_api_dates(params[i], date=dates) - # Join bbox/properties into the comma-separated form the OGC API expects. - # For ``bbox`` use ``len() > 0`` so ``numpy.ndarray`` inputs don't trip - # the ambiguous truth-value error; for ``properties`` the truthy check is - # right because ``_get_args`` always materializes it to a list (and - # ``_switch_properties_id`` further upstream returns ``[]`` for None). + # `len()` instead of truthiness: a numpy ndarray would raise on `if bbox:`. if bbox is not None and len(bbox) > 0: params["bbox"] = ",".join(map(str, bbox)) if properties: @@ -1203,12 +1201,6 @@ def _check_profiles( "thresholds", } -# Param names that must be a list of strings (never a single string). -# A single string passed in would iterate as characters in -# ``_construct_api_requests``'s ``",".join(...)`` step, producing a -# malformed URL. ``_get_args`` wraps single-string input into a list. -_LIST_ONLY_STR_PARAMS = frozenset({"properties"}) - def _normalize_str_iterable( value: str | Iterable[str] | None, @@ -1344,9 +1336,10 @@ def _get_args( for k, v in local_vars.items(): if k in to_exclude or v is None: continue - if k in _LIST_ONLY_STR_PARAMS: - # Wrap a single string so the downstream `",".join(...)` doesn't - # iterate it as characters. + if k == "monitoring_location_id": + args[k] = _check_monitoring_location_id(v) + elif k == "properties": + # `",".join(properties)` would iterate a bare string as characters. args[k] = [v] if isinstance(v, str) else _normalize_str_iterable(v, k) elif ( k in _NO_NORMALIZE_PARAMS From 702ea298ac8ca4d7cf51804fae980866c5c9c1bd Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 11:57:36 -0500 Subject: [PATCH 16/21] Widen string-filter annotations on get_combined_metadata / get_field_measurements_metadata / get_peaks These three metadata functions were added to main while PR 229 was in flight, so their string-filter parameters used the narrower `str | list[str] | None`. PR 229's centralized `_get_args` materializes any non-string Iterable (pd.Series, np.ndarray, generators) before sending the request, so the runtime accepts more than `list[str]`. Bring the type annotations in line with the other 11 OGC getters (`str | Iterable[str] | None`). `properties` is intentionally left as `str | list[str] | None` because the join-by-comma site downstream of `_get_args` only handles `list` correctly after the single-string wrap; mirroring the other getters here. --- dataretrieval/waterdata/api.py | 134 ++++++++++++++++----------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 59d64092..297b195e 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -950,61 +950,61 @@ def get_time_series_metadata( def get_combined_metadata( - monitoring_location_id: str | list[str] | None = None, - parameter_code: str | list[str] | None = None, - parameter_name: str | list[str] | None = None, - parameter_description: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - statistic_id: str | list[str] | None = None, - data_type: str | list[str] | None = None, - computation_identifier: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + parameter_name: str | Iterable[str] | None = None, + parameter_description: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + statistic_id: str | Iterable[str] | None = None, + data_type: str | Iterable[str] | None = None, + computation_identifier: str | Iterable[str] | None = None, thresholds: float | list[float] | None = None, - sublocation_identifier: str | list[str] | None = None, - primary: str | list[str] | None = None, - parent_time_series_id: str | list[str] | None = None, - web_description: str | list[str] | None = None, - last_modified: str | list[str] | None = None, - begin: str | list[str] | None = None, - end: str | list[str] | None = None, - agency_code: str | list[str] | None = None, - agency_name: str | list[str] | None = None, - monitoring_location_number: str | list[str] | None = None, - monitoring_location_name: str | list[str] | None = None, - district_code: str | list[str] | None = None, - country_code: str | list[str] | None = None, - country_name: str | list[str] | None = None, - state_code: str | list[str] | None = None, - state_name: str | list[str] | None = None, - county_code: str | list[str] | None = None, - county_name: str | list[str] | None = None, - minor_civil_division_code: str | list[str] | None = None, - site_type_code: str | list[str] | None = None, - site_type: str | list[str] | None = None, - hydrologic_unit_code: str | list[str] | None = None, - basin_code: str | list[str] | None = None, - altitude: str | list[str] | None = None, - altitude_accuracy: str | list[str] | None = None, - altitude_method_code: str | list[str] | None = None, - altitude_method_name: str | list[str] | None = None, - vertical_datum: str | list[str] | None = None, - vertical_datum_name: str | list[str] | None = None, - horizontal_positional_accuracy_code: str | list[str] | None = None, - horizontal_positional_accuracy: str | list[str] | None = None, - horizontal_position_method_code: str | list[str] | None = None, - horizontal_position_method_name: str | list[str] | None = None, - original_horizontal_datum: str | list[str] | None = None, - original_horizontal_datum_name: str | list[str] | None = None, - drainage_area: str | list[str] | None = None, - contributing_drainage_area: str | list[str] | None = None, - time_zone_abbreviation: str | list[str] | None = None, - uses_daylight_savings: str | list[str] | None = None, - construction_date: str | list[str] | None = None, - aquifer_code: str | list[str] | None = None, - national_aquifer_code: str | list[str] | None = None, - aquifer_type_code: str | list[str] | None = None, - well_constructed_depth: str | list[str] | None = None, - hole_constructed_depth: str | list[str] | None = None, - depth_source_code: str | list[str] | None = None, + sublocation_identifier: str | Iterable[str] | None = None, + primary: str | Iterable[str] | None = None, + parent_time_series_id: str | Iterable[str] | None = None, + web_description: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, + begin: str | Iterable[str] | None = None, + end: str | Iterable[str] | None = None, + agency_code: str | Iterable[str] | None = None, + agency_name: str | Iterable[str] | None = None, + monitoring_location_number: str | Iterable[str] | None = None, + monitoring_location_name: str | Iterable[str] | None = None, + district_code: str | Iterable[str] | None = None, + country_code: str | Iterable[str] | None = None, + country_name: str | Iterable[str] | None = None, + state_code: str | Iterable[str] | None = None, + state_name: str | Iterable[str] | None = None, + county_code: str | Iterable[str] | None = None, + county_name: str | Iterable[str] | None = None, + minor_civil_division_code: str | Iterable[str] | None = None, + site_type_code: str | Iterable[str] | None = None, + site_type: str | Iterable[str] | None = None, + hydrologic_unit_code: str | Iterable[str] | None = None, + basin_code: str | Iterable[str] | None = None, + altitude: str | Iterable[str] | None = None, + altitude_accuracy: str | Iterable[str] | None = None, + altitude_method_code: str | Iterable[str] | None = None, + altitude_method_name: str | Iterable[str] | None = None, + vertical_datum: str | Iterable[str] | None = None, + vertical_datum_name: str | Iterable[str] | None = None, + horizontal_positional_accuracy_code: str | Iterable[str] | None = None, + horizontal_positional_accuracy: str | Iterable[str] | None = None, + horizontal_position_method_code: str | Iterable[str] | None = None, + horizontal_position_method_name: str | Iterable[str] | None = None, + original_horizontal_datum: str | Iterable[str] | None = None, + original_horizontal_datum_name: str | Iterable[str] | None = None, + drainage_area: str | Iterable[str] | None = None, + contributing_drainage_area: str | Iterable[str] | None = None, + time_zone_abbreviation: str | Iterable[str] | None = None, + uses_daylight_savings: str | Iterable[str] | None = None, + construction_date: str | Iterable[str] | None = None, + aquifer_code: str | Iterable[str] | None = None, + national_aquifer_code: str | Iterable[str] | None = None, + aquifer_type_code: str | Iterable[str] | None = None, + well_constructed_depth: str | Iterable[str] | None = None, + hole_constructed_depth: str | Iterable[str] | None = None, + depth_source_code: str | Iterable[str] | None = None, properties: str | list[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, @@ -1763,13 +1763,13 @@ def get_field_measurements( def get_field_measurements_metadata( - monitoring_location_id: str | list[str] | None = None, - parameter_code: str | list[str] | None = None, - parameter_name: str | list[str] | None = None, - parameter_description: str | list[str] | None = None, - begin: str | list[str] | None = None, - end: str | list[str] | None = None, - last_modified: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + parameter_name: str | Iterable[str] | None = None, + parameter_description: str | Iterable[str] | None = None, + begin: str | Iterable[str] | None = None, + end: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, properties: str | list[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, @@ -1880,12 +1880,12 @@ def get_field_measurements_metadata( def get_peaks( - monitoring_location_id: str | list[str] | None = None, - parameter_code: str | list[str] | None = None, - time_series_id: str | list[str] | None = None, - unit_of_measure: str | list[str] | None = None, - time: str | list[str] | None = None, - last_modified: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, + time_series_id: str | Iterable[str] | None = None, + unit_of_measure: str | Iterable[str] | None = None, + time: str | Iterable[str] | None = None, + last_modified: str | Iterable[str] | None = None, water_year: int | list[int] | None = None, year: int | list[int] | None = None, month: int | list[int] | None = None, From 5260a8f583daf0a31b98c79d90c2a8e9057e4699 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 13:16:30 -0500 Subject: [PATCH 17/21] Fix CI and reject Mapping inputs to _format_api_dates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues: 1. CI failure on `test_get_args_basic` / `test_get_args_with_exclude`. The pre-existing tests used `monitoring_location_id="123"` purely as a placeholder to exercise `_get_args`'s filter/exclude logic. After the previous commit centralized the AGENCY-ID format check into `_get_args`, the bare "123" now raises ValueError before the dict-shaping assertions are reached. Update the placeholder to a well-formed "USGS-123" so the test's actual intent — filter + exclude wiring — still runs. 2. Copilot review: `_format_api_dates` silently accepts a Mapping by materializing its keys (`time={"2024-01-01": "x"}` → `["2024-01-01"]`), the same footgun `_normalize_str_iterable` already rejects elsewhere. Raise TypeError before the `list(...)` call. Adds a unit test. --- dataretrieval/waterdata/utils.py | 6 ++++++ tests/waterdata_utils_test.py | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 5f5404c5..17fd370b 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -241,6 +241,12 @@ def _format_api_dates( # Convert single string to list for uniform processing if isinstance(datetime_input, str): datetime_input = [datetime_input] + elif isinstance(datetime_input, Mapping): + # `list(mapping)` returns keys, which silently accepts the wrong shape. + raise TypeError( + f"date input must be a string or sequence of strings, " + f"not {type(datetime_input).__name__}." + ) elif not isinstance(datetime_input, (list, tuple)): # Materialize any other iterable (pandas.Series, numpy.ndarray, # generator, ...) so the len()/subscript operations below work. diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py index f472000e..d602598b 100644 --- a/tests/waterdata_utils_test.py +++ b/tests/waterdata_utils_test.py @@ -15,26 +15,26 @@ def test_get_args_basic(): local_vars = { - "monitoring_location_id": "123", + "monitoring_location_id": "USGS-123", "service": "daily", "output_id": "daily_id", "none_val": None, "other": "val", } result = _get_args(local_vars) - assert result == {"monitoring_location_id": "123", "other": "val"} + assert result == {"monitoring_location_id": "USGS-123", "other": "val"} def test_get_args_with_exclude(): local_vars = { - "monitoring_location_id": "123", + "monitoring_location_id": "USGS-123", "service": "daily", "output_id": "daily_id", "to_exclude": "secret", "other": "val", } result = _get_args(local_vars, exclude={"to_exclude"}) - assert result == {"monitoring_location_id": "123", "other": "val"} + assert result == {"monitoring_location_id": "USGS-123", "other": "val"} def test_get_args_empty(): @@ -224,6 +224,16 @@ def test_format_api_dates_open_ended_range_with_none(): assert _format_api_dates([None, "2024-01-01"], date=True) == "../2024-01-01" +def test_format_api_dates_rejects_mapping(): + """`time={"2024-01-01": "x"}` would silently materialize as the keys list, + accepting input the user clearly didn't intend. + """ + import pytest + + with pytest.raises(TypeError, match="date input must be a string or sequence"): + _format_api_dates({"2024-01-01": "ignored"}) + + def _make_response(status, body, reason=None, content_type="text/html"): resp = requests.Response() resp.status_code = status From 463912a34b2dfdb229b81e80d118b5d8ad6be815 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 14:49:11 -0500 Subject: [PATCH 18/21] Update parameter docstrings to "string or iterable of strings" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot noted the public docstrings still described multi-value string filters as "string or list of strings" while the annotations were widened to `str | Iterable[str] | None`. The user-facing docs now lie about the contract — pd.Series / np.ndarray / generators / tuples all work at runtime but read as unsupported. Replaced 165 occurrences across api.py. The 11 `properties` parameter docstrings remain "string or list of strings" because that parameter is intentionally typed `str | list[str] | None` (the comma-join site requires a list after the single-string wrap). --- dataretrieval/waterdata/api.py | 330 ++++++++++++++++----------------- 1 file changed, 165 insertions(+), 165 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 297b195e..17a25eb5 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -73,19 +73,19 @@ def get_daily( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - statistic_id : string or list of strings, optional + statistic_id : string or iterable of strings, optional A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at @@ -95,10 +95,10 @@ def get_daily( Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified - time_series_id : string or list of strings, optional + time_series_id : string or iterable of strings, optional A unique identifier representing a single time series. This corresponds to the id field in the time-series-metadata endpoint. - daily_id : string or list of strings, optional + daily_id : string or iterable of strings, optional A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does @@ -106,7 +106,7 @@ def get_daily( uniquely identify a single observation over time, compare the time and time_series_id fields; each time series will only have a single observation at a given time. - approval_status : string or list of strings, optional + approval_status : string or iterable of strings, optional Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional @@ -117,14 +117,14 @@ def get_daily( approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to: https://waterdata.usgs.gov/provisional-data-statement/. - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional A human-readable description of the units of measurement associated with an observation. - qualifier : string or list of strings, optional + qualifier : string or iterable of strings, optional This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated. - value : string or list of strings, optional + value : string or iterable of strings, optional The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision. last_modified : string, optional @@ -278,19 +278,19 @@ def get_continuous( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - statistic_id : string or list of strings, optional + statistic_id : string or iterable of strings, optional A code corresponding to the statistic an observation represents. Continuous data are nearly always associated with statistic id 00011. Using a different code (such as 00003 for mean) will @@ -302,10 +302,10 @@ def get_continuous( Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified - time_series_id : string or list of strings, optional + time_series_id : string or iterable of strings, optional A unique identifier representing a single time series. This corresponds to the id field in the time-series-metadata endpoint. - continuous_id : string or list of strings, optional + continuous_id : string or iterable of strings, optional A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does @@ -313,7 +313,7 @@ def get_continuous( uniquely identify a single observation over time, compare the time and time_series_id fields; each time series will only have a single observation at a given time. - approval_status : string or list of strings, optional + approval_status : string or iterable of strings, optional Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional @@ -324,14 +324,14 @@ def get_continuous( approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to: https://waterdata.usgs.gov/provisional-data-statement/. - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional A human-readable description of the units of measurement associated with an observation. - qualifier : string or list of strings, optional + qualifier : string or iterable of strings, optional This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated. - value : string or list of strings, optional + value : string or iterable of strings, optional The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision. last_modified : string, optional @@ -487,27 +487,27 @@ def get_monitoring_locations( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - agency_code : string or list of strings, optional + agency_code : string or iterable of strings, optional The agency that is reporting the data. Agency codes are fixed values assigned by the National Water Information System (NWIS). - agency_name : string or list of strings, optional + agency_name : string or iterable of strings, optional The name of the agency that is reporting the data. - monitoring_location_number : string or list of strings, optional + monitoring_location_number : string or iterable of strings, optional Each monitoring location in the USGS data base has a unique 8- to 15-digit identification number. Monitoring location numbers are assigned based on this logic: https://help.waterdata.usgs.gov/faq/sites/do-station-numbers-have-any-particular-meaning. - monitoring_location_name : string or list of strings, optional + monitoring_location_name : string or iterable of strings, optional This is the official name of the monitoring location in the database. For well information this can be a district-assigned local number. - district_code : string or list of strings, optional + district_code : string or iterable of strings, optional The Water Science Centers (WSCs) across the United States use the FIPS state code as the district code. In some case, monitoring locations and samples may be managed by a water science center that is adjacent to the @@ -515,11 +515,11 @@ def get_monitoring_locations( monitoring location may have a district code of 30 which translates to Montana, but the state code could be 56 for Wyoming because that is where the monitoring location actually is located. - country_code : string or list of strings, optional + country_code : string or iterable of strings, optional The code for the country in which the monitoring location is located. - country_name : string or list of strings, optional + country_name : string or iterable of strings, optional The name of the country in which the monitoring location is located. - state_code : string or list of strings, optional + state_code : string or iterable of strings, optional State code. A two-digit ANSI code (formerly FIPS code) as defined by the American National Standards Institute, to define States and equivalents. A three-digit ANSI code is used to define counties and @@ -529,26 +529,26 @@ def get_monitoring_locations( political subdivisions other than the US are Mexico and Canada. The Mexican states have US state codes ranging from 81-86 and Canadian provinces have state codes ranging from 90-98. - state_name : string or list of strings, optional + state_name : string or iterable of strings, optional The name of the state or state equivalent in which the monitoring location is located. - county_code : string or list of strings, optional + county_code : string or iterable of strings, optional The code for the county or county equivalent (parish, borough, etc.) in which the monitoring location is located. A `list of codes `_ is available. - county_name : string or list of strings, optional + county_name : string or iterable of strings, optional The name of the county or county equivalent (parish, borough, etc.) in which the monitoring location is located. A `list of codes `_ is available. - minor_civil_division_code : string or list of strings, optional + minor_civil_division_code : string or iterable of strings, optional Codes for primary governmental or administrative divisions of the county or county equivalent in which the monitoring location is located. - site_type_code : string or list of strings, optional + site_type_code : string or iterable of strings, optional A code describing the hydrologic setting of the monitoring location. Example: "US:15:001" (United States: Hawaii, Hawaii County) - site_type : string or list of strings, optional + site_type : string or iterable of strings, optional A description of the hydrologic setting of the monitoring location. - hydrologic_unit_code : string or list of strings, optional + hydrologic_unit_code : string or iterable of strings, optional The United States is divided and sub-divided into successively smaller hydrologic units which are classified into four levels: regions, sub-regions, accounting units, and cataloging units. The hydrologic @@ -557,20 +557,20 @@ def get_monitoring_locations( unique hydrologic unit code (HUC) consisting of two to eight digits based on the four levels of classification in the hydrologic unit system. - basin_code : string or list of strings, optional + basin_code : string or iterable of strings, optional The Basin Code or "drainage basin code" is a two-digit code that further subdivides the 8-digit hydrologic-unit code. The drainage basin code is defined by the USGS State Office where the monitoring location is located. - altitude : string or list of strings, optional + altitude : string or iterable of strings, optional Altitude of the monitoring location referenced to the specified Vertical Datum. - altitude_accuracy : string or list of strings, optional + altitude_accuracy : string or iterable of strings, optional Accuracy of the altitude, in feet. An accuracy of +/- 0.1 foot would be entered as “.1”. Many altitudes are interpolated from the contours on topographic maps; accuracies determined in this way are generally entered as one-half of the contour interval. - altitude_method_code : string or list of strings, optional + altitude_method_code : string or iterable of strings, optional Codes representing the method used to measure altitude. altitude_method_name : float, optional The name of the the method used to measure altitude. @@ -580,27 +580,27 @@ def get_monitoring_locations( vertical_datum_name : float, optional The datum used to determine altitude and vertical position at the monitoring location. - horizontal_positional_accuracy_code : string or list of strings, optional + horizontal_positional_accuracy_code : string or iterable of strings, optional Indicates the accuracy of the latitude longitude values. - horizontal_positional_accuracy : string or list of strings, optional + horizontal_positional_accuracy : string or iterable of strings, optional Indicates the accuracy of the latitude longitude values. - horizontal_position_method_code : string or list of strings, optional + horizontal_position_method_code : string or iterable of strings, optional Indicates the method used to determine latitude longitude values. - horizontal_position_method_name : string or list of strings, optional + horizontal_position_method_name : string or iterable of strings, optional Indicates the method used to determine latitude longitude values. - original_horizontal_datum : string or list of strings, optional + original_horizontal_datum : string or iterable of strings, optional Coordinates are published in EPSG:4326 / WGS84 / World Geodetic System 1984. This field indicates the original datum used to determine coordinates before they were converted. - original_horizontal_datum_name : string or list of strings, optional + original_horizontal_datum_name : string or iterable of strings, optional Coordinates are published in EPSG:4326 / WGS84 / World Geodetic System 1984. This field indicates the original datum used to determine coordinates before they were converted. - drainage_area : string or list of strings, optional + drainage_area : string or iterable of strings, optional The area enclosed by a topographic divide from which direct surface runoff from precipitation normally drains by gravity into the stream above that point. - contributing_drainage_area : string or list of strings, optional + contributing_drainage_area : string or iterable of strings, optional The contributing drainage area of a lake, stream, wetland, or estuary monitoring location, in square miles. This item should be present only if the contributing area is different from the total drainage area. This @@ -609,19 +609,19 @@ def get_monitoring_locations( groundwater or traps the water in ponds so that rainfall does not contribute to runoff. A transbasin diversion can also affect the total drainage area. - time_zone_abbreviation : string or list of strings, optional + time_zone_abbreviation : string or iterable of strings, optional A short code describing the time zone used by a monitoring location. - uses_daylight_savings : string or list of strings, optional + uses_daylight_savings : string or iterable of strings, optional A flag indicating whether or not a monitoring location uses daylight savings. - construction_date : string or list of strings, optional + construction_date : string or iterable of strings, optional Date the well was completed. - aquifer_code : string or list of strings, optional + aquifer_code : string or iterable of strings, optional Local aquifers in the USGS water resources data base are identified by a geohydrologic unit code (a three-digit number related to the age of the formation, followed by a 4 or 5 character abbreviation for the geologic unit or aquifer name). Additional information is available `at this link `_. - national_aquifer_code : string or list of strings, optional + national_aquifer_code : string or iterable of strings, optional National aquifers are the principal aquifers or aquifer systems in the United States, defined as regionally extensive aquifers or aquifer systems that have the potential to be used as a source of potable water. Not all groundwater @@ -629,7 +629,7 @@ def get_monitoring_locations( monitoring locations will not be retrieved using this search criteria. A `list of National aquifer codes and names `_ is available. - aquifer_type_code : string or list of strings, optional + aquifer_type_code : string or iterable of strings, optional Groundwater occurs in aquifers under two different conditions. Where water only partly fills an aquifer, the upper surface is free to rise and decline. These aquifers are referred to as unconfined (or water-table) aquifers. Where @@ -638,15 +638,15 @@ def get_monitoring_locations( aquifer is penetrated by a well, the water level in the well will rise above the top of the aquifer (but not necessarily above land surface). Additional information is available `at this link `_. - well_constructed_depth : string or list of strings, optional + well_constructed_depth : string or iterable of strings, optional The depth of the finished well, in feet below land surface datum. Note: Not all groundwater monitoring locations have information on Well Depth. Such monitoring locations will not be retrieved using this search criteria. - hole_constructed_depth : string or list of strings, optional + hole_constructed_depth : string or iterable of strings, optional The total depth to which the hole is drilled, in feet below land surface datum. Note: Not all groundwater monitoring locations have information on Hole Depth. Such monitoring locations will not be retrieved using this search criteria. - depth_source_code : string or list of strings, optional + depth_source_code : string or iterable of strings, optional A code indicating the source of water-level data. A `list of codes `_ is available. @@ -765,31 +765,31 @@ def get_time_series_metadata( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - parameter_name : string or list of strings, optional + parameter_name : string or iterable of strings, optional A human-understandable name corresponding to parameter_code. properties : string or list of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified - statistic_id : string or list of strings, optional + statistic_id : string or iterable of strings, optional A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html. - hydrologic_unit_code : string or list of strings, optional + hydrologic_unit_code : string or iterable of strings, optional The United States is divided and sub-divided into successively smaller hydrologic units which are classified into four levels: regions, sub-regions, accounting units, and cataloging units. The hydrologic @@ -797,7 +797,7 @@ def get_time_series_metadata( to the largest (regions). Each hydrologic unit is identified by a unique hydrologic unit code (HUC) consisting of two to eight digits based on the four levels of classification in the hydrologic unit system. - state_name : string or list of strings, optional + state_name : string or iterable of strings, optional The name of the state or state equivalent in which the monitoring location is located. last_modified : string, optional @@ -817,15 +817,15 @@ def get_time_series_metadata( * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours - begin : string or list of strings, optional + begin : string or iterable of strings, optional This field contains the same information as "begin_utc", but in the local time of the monitoring location. It is retained for backwards compatibility, but will be removed in V1 of these APIs. - end : string or list of strings, optional + end : string or iterable of strings, optional This field contains the same information as "end_utc", but in the local time of the monitoring location. It is retained for backwards compatibility, but will be removed in V1 of these APIs. - begin_utc : string or list of strings, optional + begin_utc : string or iterable of strings, optional The datetime of the earliest observation in the time series. Together with end, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection @@ -844,7 +844,7 @@ def get_time_series_metadata( * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours - end_utc : string or list of strings, optional + end_utc : string or iterable of strings, optional The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series @@ -866,12 +866,12 @@ def get_time_series_metadata( * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional A human-readable description of the units of measurement associated with an observation. - computation_period_identifier : string or list of strings, optional + computation_period_identifier : string or iterable of strings, optional Indicates the period of data used for any statistical computations. - computation_identifier : string or list of strings, optional + computation_identifier : string or iterable of strings, optional Indicates whether the data from this time series represent a specific statistical computation. thresholds : numeric or list of numbers, optional @@ -880,13 +880,13 @@ def get_time_series_metadata( sensor is non-operative. These thresholds are sometimes used to automatically determine if an observation is erroneous due to sensor error, and therefore shouldn't be included in the time series. - sublocation_identifier : string or list of strings, optional - primary : string or list of strings, optional - parent_time_series_id : string or list of strings, optional - time_series_id : string or list of strings, optional + sublocation_identifier : string or iterable of strings, optional + primary : string or iterable of strings, optional + parent_time_series_id : string or iterable of strings, optional + time_series_id : string or iterable of strings, optional A unique identifier representing a single time series. This corresponds to the id field in the time-series-metadata endpoint. - web_description : string or list of strings, optional + web_description : string or iterable of strings, optional A description of what this time series represents, as used by WDFN and other USGS data dissemination products. skip_geometry : boolean, optional @@ -1037,45 +1037,45 @@ def get_combined_metadata( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. Created by combining the agency code (e.g. ``USGS``) with the ID number (e.g. ``02238500``), separated by a hyphen (e.g. ``"USGS-02238500"``). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional 5-digit codes used to identify the constituent measured and the units of measure. See https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - parameter_name : string or list of strings, optional + parameter_name : string or iterable of strings, optional A human-understandable name corresponding to ``parameter_code``. - parameter_description : string or list of strings, optional + parameter_description : string or iterable of strings, optional A human-readable description of what is being measured. - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional A human-readable description of the units of measurement associated with an observation. - statistic_id : string or list of strings, optional + statistic_id : string or iterable of strings, optional A code corresponding to the statistic an observation represents (e.g. ``00001`` max, ``00002`` min, ``00003`` mean). Full list at https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html. - data_type : string or list of strings, optional + data_type : string or iterable of strings, optional The type of data the time series represents, e.g. ``"Continuous values"``, ``"Daily values"``, ``"Field measurements"``. - computation_identifier : string or list of strings, optional + computation_identifier : string or iterable of strings, optional Indicates whether the data from this time series represent a specific statistical computation. thresholds : numeric or list of numbers, optional Numeric limits known for a time series (e.g. historic maximum, below-which-the-sensor-is-non-operative). - sublocation_identifier : string or list of strings, optional - primary : string or list of strings, optional + sublocation_identifier : string or iterable of strings, optional + primary : string or iterable of strings, optional A flag identifying whether the time series is "primary". Primary time series are standard observations that have undergone Bureau review and approval. Non-primary (provisional) time series have a missing ``primary`` value, are produced for timely best-science use, and are retained by this system for only 120 days. - parent_time_series_id : string or list of strings, optional - web_description : string or list of strings, optional + parent_time_series_id : string or iterable of strings, optional + web_description : string or iterable of strings, optional A description of what this time series represents, as used by WDFN and other USGS data dissemination products. last_modified, begin, end : string, optional @@ -1084,7 +1084,7 @@ def get_combined_metadata( or an ISO 8601 duration (e.g. ``"P1M"``, ``"PT36H"``). See :func:`get_time_series_metadata` for the full grammar. state_name, county_name, hydrologic_unit_code, site_type, \ -site_type_code : string or list of strings, optional +site_type_code : string or iterable of strings, optional Common location-catalog filters carried over from the ``monitoring-locations`` collection. The function also accepts the full list of location-catalog kwargs (agency, district, @@ -1214,19 +1214,19 @@ def get_latest_continuous( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - statistic_id : string or list of strings, optional + statistic_id : string or iterable of strings, optional A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at @@ -1236,10 +1236,10 @@ def get_latest_continuous( options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified - time_series_id : string or list of strings, optional + time_series_id : string or iterable of strings, optional A unique identifier representing a single time series. This corresponds to the id field in the time-series-metadata endpoint. - latest_continuous_id : string or list of strings, optional + latest_continuous_id : string or iterable of strings, optional A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does @@ -1247,7 +1247,7 @@ def get_latest_continuous( uniquely identify a single observation over time, compare the time and time_series_id fields; each time series will only have a single observation at a given time. - approval_status : string or list of strings, optional + approval_status : string or iterable of strings, optional Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional @@ -1258,14 +1258,14 @@ def get_latest_continuous( approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to: https://waterdata.usgs.gov/provisional-data-statement/. - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional A human-readable description of the units of measurement associated with an observation. - qualifier : string or list of strings, optional + qualifier : string or iterable of strings, optional This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated. - value : string or list of strings, optional + value : string or iterable of strings, optional The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision. last_modified : string, optional @@ -1411,19 +1411,19 @@ def get_latest_daily( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - statistic_id : string or list of strings, optional + statistic_id : string or iterable of strings, optional A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at @@ -1433,10 +1433,10 @@ def get_latest_daily( options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified - time_series_id : string or list of strings, optional + time_series_id : string or iterable of strings, optional A unique identifier representing a single time series. This corresponds to the id field in the time-series-metadata endpoint. - latest_daily_id : string or list of strings, optional + latest_daily_id : string or iterable of strings, optional A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does @@ -1444,7 +1444,7 @@ def get_latest_daily( uniquely identify a single observation over time, compare the time and time_series_id fields; each time series will only have a single observation at a given time. - approval_status : string or list of strings, optional + approval_status : string or iterable of strings, optional Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional @@ -1455,14 +1455,14 @@ def get_latest_daily( approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to: https://waterdata.usgs.gov/provisional-data-statement/. - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional A human-readable description of the units of measurement associated with an observation. - qualifier : string or list of strings, optional + qualifier : string or iterable of strings, optional This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated. - value : string or list of strings, optional + value : string or iterable of strings, optional The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision. last_modified : string, optional @@ -1603,19 +1603,19 @@ def get_field_measurements( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - observing_procedure_code : string or list of strings, optional + observing_procedure_code : string or iterable of strings, optional A short code corresponding to the observing procedure for the field measurement. properties : string or list of strings, optional @@ -1623,10 +1623,10 @@ def get_field_measurements( options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified - field_visit_id : string or list of strings, optional + field_visit_id : string or iterable of strings, optional A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit. - approval_status : string or list of strings, optional + approval_status : string or iterable of strings, optional Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional @@ -1637,14 +1637,14 @@ def get_field_measurements( approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to: https://waterdata.usgs.gov/provisional-data-statement/. - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional A human-readable description of the units of measurement associated with an observation. - qualifier : string or list of strings, optional + qualifier : string or iterable of strings, optional This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated. - value : string or list of strings, optional + value : string or iterable of strings, optional The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision. last_modified : string, optional @@ -1664,12 +1664,12 @@ def get_field_measurements( * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours - observing_procedure : string or list of strings, optional + observing_procedure : string or iterable of strings, optional Water measurement or water-quality observing procedure descriptions. - vertical_datum : string or list of strings, optional + vertical_datum : string or iterable of strings, optional The datum used to determine altitude and vertical position at the monitoring location. - measuring_agency : string or list of strings, optional + measuring_agency : string or iterable of strings, optional The agency performing the measurement. skip_geometry : boolean, optional This option can be used to skip response geometries for each feature. @@ -1798,15 +1798,15 @@ def get_field_measurements_metadata( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location, in ``AGENCY-ID`` form (e.g. ``"USGS-02238500"``). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional 5-digit parameter code. See https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - parameter_name : string or list of strings, optional + parameter_name : string or iterable of strings, optional A human-understandable name corresponding to ``parameter_code``. - parameter_description : string or list of strings, optional + parameter_description : string or iterable of strings, optional A human-readable description of what is being measured. begin, end, last_modified : string, optional Datetime fields that accept either an RFC 3339 datetime, an @@ -1913,16 +1913,16 @@ def get_peaks( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location, in ``AGENCY-ID`` form (e.g. ``"USGS-02238500"``). - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional 5-digit parameter code. Most peaks records are ``"00060"`` (discharge) or ``"00065"`` (stage / gage height). Full list at https://help.waterdata.usgs.gov/codes-and-parameters/parameters. - time_series_id : string or list of strings, optional + time_series_id : string or iterable of strings, optional ID of the time series the peak belongs to. - unit_of_measure : string or list of strings, optional + unit_of_measure : string or iterable of strings, optional Human-readable units (e.g. ``"ft^3/s"``, ``"ft"``). time : string, optional Datetime, interval, or duration filter on the peak's date. @@ -2168,7 +2168,7 @@ def get_samples( "actgroup", "count" projects - "project", "projectmonitoringlocationweight" organizations - "organization", "count" - activityMediaName : string or list of strings, optional + activityMediaName : string or iterable of strings, optional Name or code indicating environmental medium in which sample was taken. Check the `activityMediaName_lookup()` function in this module for all possible inputs. @@ -2183,20 +2183,20 @@ def get_samples( The logic is inclusive, i.e. it will also return results that match the date. If left as None, will pull all data after activityStartDateLower up to the most recent available results. - activityTypeCode : string or list of strings, optional + activityTypeCode : string or iterable of strings, optional Text code that describes type of field activity performed. Example: "Sample-Routine, regular". - characteristicGroup : string or list of strings, optional + characteristicGroup : string or iterable of strings, optional Characteristic group is a broad category of characteristics describing one or more results. Check the `characteristicGroup_lookup()` function in this module for all possible inputs. Example: "Organics, PFAS" - characteristic : string or list of strings, optional + characteristic : string or iterable of strings, optional Characteristic is a specific category describing one or more results. Check the `characteristic_lookup()` function in this module for all possible inputs. Example: "Suspended Sediment Discharge" - characteristicUserSupplied : string or list of strings, optional + characteristicUserSupplied : string or iterable of strings, optional A user supplied characteristic name describing one or more results. boundingBox: list of four floats, optional Filters on the the associated monitoring location's point location @@ -2212,39 +2212,39 @@ def get_samples( * Northern-most longitude Example: [-92.8,44.2,-88.9,46.0] - countryFips : string or list of strings, optional + countryFips : string or iterable of strings, optional Example: "US" (United States) - stateFips : string or list of strings, optional + stateFips : string or iterable of strings, optional Check the `stateFips_lookup()` function in this module for all possible inputs. Example: "US:15" (United States: Hawaii) - countyFips : string or list of strings, optional + countyFips : string or iterable of strings, optional Check the `countyFips_lookup()` function in this module for all possible inputs. Example: "US:15:001" (United States: Hawaii, Hawaii County) - siteTypeCode : string or list of strings, optional + siteTypeCode : string or iterable of strings, optional An abbreviation for a certain site type. Check the `siteType_lookup()` function in this module for all possible inputs. Example: "GW" (Groundwater site) - siteTypeName : string or list of strings, optional + siteTypeName : string or iterable of strings, optional A full name for a certain site type. Check the `siteType_lookup()` function in this module for all possible inputs. Example: "Well" - usgsPCode : string or list of strings, optional + usgsPCode : string or iterable of strings, optional 5-digit number used in the US Geological Survey computerized data system, National Water Information System (NWIS), to uniquely identify a specific constituent. Check the `characteristic_lookup()` function in this module for all possible inputs. Example: "00060" (Discharge, cubic feet per second) - hydrologicUnit : string or list of strings, optional + hydrologicUnit : string or iterable of strings, optional Max 12-digit number used to describe a hydrologic unit. Example: "070900020502" - monitoringLocationIdentifier : string or list of strings, optional + monitoringLocationIdentifier : string or iterable of strings, optional A monitoring location identifier has two parts: the agency code and the location number, separated by a dash (-). Example: "USGS-040851385" - organizationIdentifier : string or list of strings, optional + organizationIdentifier : string or iterable of strings, optional Designator used to uniquely identify a specific organization. Currently only accepting the organization "USGS". pointLocationLatitude : float, optional @@ -2256,11 +2256,11 @@ def get_samples( pointLocationWithinMiles : float, optional Radius for a point/radius query. Must be used with pointLocationLatitude and pointLocationLongitude - projectIdentifier : string or list of strings, optional + projectIdentifier : string or iterable of strings, optional Designator used to uniquely identify a data collection project. Project identifiers are specific to an organization (e.g. USGS). Example: "ZH003QW03" - recordIdentifierUserSupplied : string or list of strings, optional + recordIdentifierUserSupplied : string or iterable of strings, optional Internal AQS record identifier that returns 1 entry. Only available for the "results" service. @@ -2458,7 +2458,7 @@ def get_stats_por( Start day for the query in the month-day format (MM-DD). end_date: string or datetime, optional End day for the query in the month-day format (MM-DD). - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the @@ -2478,7 +2478,7 @@ def get_stats_por( Example: "GW" (Groundwater site) site_type_name: string, optional Site type name query parameter. - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at @@ -2583,7 +2583,7 @@ def get_stats_date_range( end_date: string or datetime, optional End date for the query in the year-month-day format (YYYY-MM-DD). - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the @@ -2606,7 +2606,7 @@ def get_stats_date_range( You can see a list of valid site type names here: https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. Example: "Well" - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at @@ -2701,20 +2701,20 @@ def get_channel( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional A unique identifier representing a single monitoring location. This corresponds to the id field in the monitoring-locations endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). - field_visit_id : string or list of strings, optional + field_visit_id : string or iterable of strings, optional A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit. - measurement_number : string or list of strings, optional + measurement_number : string or iterable of strings, optional Measurement number. - time : string or list of strings, optional + time : string or iterable of strings, optional The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots @@ -2731,39 +2731,39 @@ def get_channel( "../2018-03-18T12:31:12Z" * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours - channel_name : string or list of strings, optional + channel_name : string or iterable of strings, optional The channel name. - channel_flow : string or list of strings, optional + channel_flow : string or iterable of strings, optional The units for channel discharge. - channel_width : string or list of strings, optional + channel_width : string or iterable of strings, optional The channel width. - channel_width_unit : string or list of strings, optional + channel_width_unit : string or iterable of strings, optional The units for channel width. - channel_area : string or list of strings, optional + channel_area : string or iterable of strings, optional The channel area. - channel_area_unit : string or list of strings, optional + channel_area_unit : string or iterable of strings, optional The units for channel area. - channel_velocity : string or list of strings, optional + channel_velocity : string or iterable of strings, optional The mean channel velocity. - channel_velocity_unit : string or list of strings, optional + channel_velocity_unit : string or iterable of strings, optional The units for channel velocity. - channel_location_distance : string or list of strings, optional + channel_location_distance : string or iterable of strings, optional The channel location distance. - channel_location_distance_unit : string or list of strings, optional + channel_location_distance_unit : string or iterable of strings, optional The units for channel location distance. - channel_stability : string or list of strings, optional + channel_stability : string or iterable of strings, optional The stability of the channel material. - channel_material : string or list of strings, optional + channel_material : string or iterable of strings, optional The channel material. - channel_evenness : string or list of strings, optional + channel_evenness : string or iterable of strings, optional The channel evenness from bank to bank. - horizontal_velocity_description : string or list of strings, optional + horizontal_velocity_description : string or iterable of strings, optional The horizontal velocity description. - vertical_velocity_description : string or list of strings, optional + vertical_velocity_description : string or iterable of strings, optional The vertical velocity description. - longitudinal_velocity_description : string or list of strings, optional + longitudinal_velocity_description : string or iterable of strings, optional The longitudinal velocity description. - measurement_type : string or list of strings, optional + measurement_type : string or iterable of strings, optional The measurement type. The last time a record was refreshed in our database. This may happen due to regular operational processes and does not necessarily indicate @@ -2787,7 +2787,7 @@ def get_channel( The returning object will be a data frame with no spatial information. Note that the USGS Water Data APIs use camelCase "skipGeometry" in CQL2 queries. - channel_measurement_type : string or list of strings, optional + channel_measurement_type : string or iterable of strings, optional The channel measurement type. properties : string or list of strings, optional A vector of requested columns to be returned from the query. Available From 0cf981eff0e661215ba90a994b282821956fb564 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 14:59:02 -0500 Subject: [PATCH 19/21] Widen properties annotation to Iterable[str] for consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit kept `properties: str | list[str] | None` based on a wrong premise — that `",".join(...)` downstream "requires a list." It doesn't: `_get_args` runs `_normalize_str_iterable` for properties exactly like every other multi-value string filter, materializing pd.Series, np.ndarray, generators, and tuples into a list before any downstream code sees them. The stress test already proved all five iterable shapes work at runtime. Bring the annotation and docstring in line with the others: 11 signatures (`str | list[str] | None` -> `str | Iterable[str] | None`) and 11 docstring lines ("string or list of strings" -> "string or iterable of strings"). --- dataretrieval/waterdata/api.py | 44 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 17a25eb5..ad268194 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -42,7 +42,7 @@ def get_daily( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, time_series_id: str | Iterable[str] | None = None, daily_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -90,7 +90,7 @@ def get_daily( Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, @@ -244,7 +244,7 @@ def get_continuous( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, time_series_id: str | Iterable[str] | None = None, continuous_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -297,7 +297,7 @@ def get_continuous( typically return no results. A complete list of codes and their descriptions can be found at https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, @@ -469,7 +469,7 @@ def get_monitoring_locations( well_constructed_depth: str | Iterable[str] | None = None, hole_constructed_depth: str | Iterable[str] | None = None, depth_source_code: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, skip_geometry: bool | None = None, time: str | Iterable[str] | None = None, bbox: list[float] | None = None, @@ -650,7 +650,7 @@ def get_monitoring_locations( A code indicating the source of water-level data. A `list of codes `_ is available. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, @@ -730,7 +730,7 @@ def get_time_series_metadata( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, parameter_name: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, hydrologic_unit_code: str | Iterable[str] | None = None, state_name: str | Iterable[str] | None = None, @@ -779,7 +779,7 @@ def get_time_series_metadata( https://help.waterdata.usgs.gov/codes-and-parameters/parameters. parameter_name : string or iterable of strings, optional A human-understandable name corresponding to parameter_code. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, @@ -1005,7 +1005,7 @@ def get_combined_metadata( well_constructed_depth: str | Iterable[str] | None = None, hole_constructed_depth: str | Iterable[str] | None = None, depth_source_code: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, @@ -1091,7 +1091,7 @@ def get_combined_metadata( altitude, vertical/horizontal datum, drainage area, aquifer, well construction, …); see :func:`get_monitoring_locations` for descriptions of each. - properties : string or list of strings, optional + properties : string or iterable of strings, optional Subset of columns to return. Defaults to every available property. skip_geometry : boolean, optional @@ -1185,7 +1185,7 @@ def get_latest_continuous( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, time_series_id: str | Iterable[str] | None = None, latest_continuous_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -1231,7 +1231,7 @@ def get_latest_continuous( Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, @@ -1380,7 +1380,7 @@ def get_latest_daily( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, statistic_id: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, time_series_id: str | Iterable[str] | None = None, latest_daily_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, @@ -1428,7 +1428,7 @@ def get_latest_daily( Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, @@ -1576,7 +1576,7 @@ def get_field_measurements( monitoring_location_id: str | Iterable[str] | None = None, parameter_code: str | Iterable[str] | None = None, observing_procedure_code: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, field_visit_id: str | Iterable[str] | None = None, approval_status: str | Iterable[str] | None = None, unit_of_measure: str | Iterable[str] | None = None, @@ -1618,7 +1618,7 @@ def get_field_measurements( observing_procedure_code : string or iterable of strings, optional A short code corresponding to the observing procedure for the field measurement. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, @@ -1770,7 +1770,7 @@ def get_field_measurements_metadata( begin: str | Iterable[str] | None = None, end: str | Iterable[str] | None = None, last_modified: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, @@ -1813,7 +1813,7 @@ def get_field_measurements_metadata( interval (``"start/end"``, optionally half-bounded with ``..``), or an ISO 8601 duration (e.g. ``"P1M"``, ``"PT36H"``). See :func:`get_time_series_metadata` for the full grammar. - properties : string or list of strings, optional + properties : string or iterable of strings, optional Subset of columns to return. Defaults to every available property. skip_geometry : boolean, optional Skip per-feature geometries; the returned object will be a plain @@ -1891,7 +1891,7 @@ def get_peaks( month: int | list[int] | None = None, day: int | list[int] | None = None, peak_since: int | list[int] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, @@ -1937,7 +1937,7 @@ def get_peaks( Filter on the year since which the peak value has stood as the record (the API serves this field as an integer; many rows are ``null``). - properties : string or list of strings, optional + properties : string or iterable of strings, optional Subset of columns to return. Defaults to every available property. skip_geometry : boolean, optional Skip per-feature geometries; the returned object will be a plain @@ -2688,7 +2688,7 @@ def get_channel( measurement_type: str | Iterable[str] | None = None, last_modified: str | Iterable[str] | None = None, channel_measurement_type: str | Iterable[str] | None = None, - properties: str | list[str] | None = None, + properties: str | Iterable[str] | None = None, skip_geometry: bool | None = None, bbox: list[float] | None = None, limit: int | None = None, @@ -2789,7 +2789,7 @@ def get_channel( CQL2 queries. channel_measurement_type : string or iterable of strings, optional The channel measurement type. - properties : string or list of strings, optional + properties : string or iterable of strings, optional A vector of requested columns to be returned from the query. Available options are: geometry, channel_measurements_id, monitoring_location_id, field_visit_id, measurement_number, time, channel_name, channel_flow, From b0f2289e44a668ff1ab35515b893ea717d670405 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 15:03:20 -0500 Subject: [PATCH 20/21] Validate monitoring_location_id in get_ratings and widen annotations in nearest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot found that `get_ratings` accepts `monitoring_location_id` and documents the same AGENCY-ID contract, but builds its STAC filter directly without routing through `_get_args` — so the centralized validation never ran. Call `_check_monitoring_location_id` at the top of `get_ratings` and widen the annotation/docstring to `Iterable[str]` for consistency. `get_nearest_continuous` inherits validation via its forwarded call to `get_continuous`, so its behavior is already correct — but its annotation and docstring still advertised `list[str]`. Widen both for parity. --- dataretrieval/waterdata/nearest.py | 9 +++++---- dataretrieval/waterdata/ratings.py | 13 ++++++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dataretrieval/waterdata/nearest.py b/dataretrieval/waterdata/nearest.py index 29484875..12aad61c 100644 --- a/dataretrieval/waterdata/nearest.py +++ b/dataretrieval/waterdata/nearest.py @@ -5,6 +5,7 @@ from __future__ import annotations +from collections.abc import Iterable from typing import Literal, get_args import pandas as pd @@ -18,8 +19,8 @@ def get_nearest_continuous( targets, - monitoring_location_id: str | list[str] | None = None, - parameter_code: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, + parameter_code: str | Iterable[str] | None = None, *, window: str | pd.Timedelta = "PT7M30S", on_tie: OnTie = "first", @@ -44,9 +45,9 @@ def get_nearest_continuous( Target timestamps. Naive datetimes are treated as UTC. Accepts a list, ``pandas.Series``, ``pandas.DatetimeIndex``, ``numpy.ndarray``, or anything ``pandas.to_datetime`` consumes. - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional Forwarded to ``get_continuous``. - parameter_code : string or list of strings, optional + parameter_code : string or iterable of strings, optional Forwarded to ``get_continuous``. window : string or ``pandas.Timedelta``, default ``"PT7M30S"`` Half-window around each target, as an ISO 8601 duration diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py index f5a1a0ff..a37c88b5 100644 --- a/dataretrieval/waterdata/ratings.py +++ b/dataretrieval/waterdata/ratings.py @@ -22,7 +22,13 @@ from dataretrieval.rdb import extract_rdb_comment, read_rdb -from .utils import _DURATION_RE, BASE_URL, _default_headers, _format_api_dates +from .utils import ( + _DURATION_RE, + BASE_URL, + _check_monitoring_location_id, + _default_headers, + _format_api_dates, +) logger = logging.getLogger(__name__) @@ -33,7 +39,7 @@ def get_ratings( - monitoring_location_id: str | list[str] | None = None, + monitoring_location_id: str | Iterable[str] | None = None, file_type: RATING_FILE_TYPE | list[RATING_FILE_TYPE] = "exsa", file_path: str | None = None, time: str | list[str] | None = None, @@ -62,7 +68,7 @@ def get_ratings( Parameters ---------- - monitoring_location_id : string or list of strings, optional + monitoring_location_id : string or iterable of strings, optional One or more identifiers in ``AGENCY-ID`` form (e.g. ``"USGS-01104475"``). If omitted, the spatial / temporal filters determine the result set. @@ -142,6 +148,7 @@ def get_ratings( ... ) """ + monitoring_location_id = _check_monitoring_location_id(monitoring_location_id) file_types = _as_list(file_type) invalid = [ft for ft in file_types if ft not in _VALID_FILE_TYPES] if invalid: From 007b76be951ba2c229bb3e0a74b3a707880e799e Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 13 May 2026 15:07:28 -0500 Subject: [PATCH 21/21] Drop dead `monitoring_location_id` entry from _NO_NORMALIZE_PARAMS; trim _check branching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit finds: 1. `monitoring_location_id` was listed in `_NO_NORMALIZE_PARAMS` from when the AGENCY-ID check lived in each public function. After the centralization commit (42852a8), `_get_args` dispatches it via its own `if k == "monitoring_location_id":` branch BEFORE the `_NO_NORMALIZE_PARAMS` check, so the entry is dead code. Remove it and tighten the comment to list only the params that actually need the bypass (date-range, bbox/boundingBox, get_peaks's int filters, get_combined_metadata's thresholds). 2. `_check_monitoring_location_id` had a two-arm if/else that called `_check_id_format` once for the str case and in a loop for the iterable case. Replaced with a one-element-tuple wrap so a single loop covers both shapes — same logic, less code. Confirmed coverage: 15 public functions accept `monitoring_location_id` (12 in api.py, get_ratings in ratings.py, get_nearest_continuous in nearest.py); each rejects bad input client-side. 5 `list[int]` filters (`water_year`, `year`, `month`, `day`, `peak_since`), `bbox`, `boundingBox`, `thresholds`, and 7 date-range params all bypass string-iterable normalization correctly. --- dataretrieval/waterdata/utils.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 17fd370b..018e1c85 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1188,15 +1188,14 @@ def _check_profiles( _MONITORING_LOCATION_ID_RE = re.compile(r"[^-\s]+-[^-\s]+") -# Param names that ``_get_args`` must NOT push through ``_normalize_str_iterable``. -# Scalar non-string knobs are detected by runtime type; only iterable-shaped -# params with special handling need to be named here: -# - ``monitoring_location_id`` is validated separately (AGENCY-ID format) +# Iterable-shaped params that ``_get_args`` must NOT push through +# ``_normalize_str_iterable`` (scalar non-string knobs are caught by runtime +# type, so only iterables with special handling need to be named here): # - date-range params may contain ``pd.NaT``/None or interval strings # - ``bbox``/``boundingBox`` are ``list[float]``, sometimes ``numpy.ndarray`` # - ``get_peaks``'s int-valued filters (``water_year`` etc.) are ``list[int]`` +# - ``get_combined_metadata``'s ``thresholds`` is ``list[float]`` _NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | { - "monitoring_location_id", "bbox", "boundingBox", "water_year", @@ -1295,11 +1294,8 @@ def _check_monitoring_location_id( ) from None if value is None: return None - if isinstance(value, str): - _check_id_format(value) - else: - for v in value: - _check_id_format(v) + for item in (value,) if isinstance(value, str) else value: + _check_id_format(item) return value