From d183bb5ec7403ff4ab34c6c7c2e314e3a35b0652 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 12:46:46 -0500 Subject: [PATCH 1/3] Fix _validate_data_source: validate on every call, not just cache miss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The validation check `data_source not in _AVAILABLE_DATA_SOURCES` was nested inside `if _AVAILABLE_DATA_SOURCES is None:`, so once any caller warmed the cache, subsequent invalid `data_source`/`feature_source` values were silently accepted. Move the check outside the cache-load branch so all calls are validated. Also guard the `_validate_data_source(feature_source)` call in `get_features` with `if feature_source:` — the previous code unconditionally validated `None` when the user provided only `comid`. The cache bug masked this; with the check now active it would otherwise raise "Invalid data source 'None'". Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/nldi.py | 16 +++++++++------- tests/nldi_test.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/dataretrieval/nldi.py b/dataretrieval/nldi.py index 8cb8f0aa..d613cf9a 100644 --- a/dataretrieval/nldi.py +++ b/dataretrieval/nldi.py @@ -259,7 +259,8 @@ def get_features( if data_source: _validate_data_source(data_source) # validate feature source - _validate_data_source(feature_source) + if feature_source: + _validate_data_source(feature_source) # validate the navigation mode if navigation_mode: _validate_navigation_mode(navigation_mode) @@ -475,12 +476,13 @@ def _validate_data_source(data_source: str): url, {}, "Error getting available data sources" ) _AVAILABLE_DATA_SOURCES = [ds["source"] for ds in available_data_sources] - if data_source not in _AVAILABLE_DATA_SOURCES: - err_msg = ( - f"Invalid data source '{data_source}'." - f" Available data sources are: {_AVAILABLE_DATA_SOURCES}" - ) - raise ValueError(err_msg) + + if data_source not in _AVAILABLE_DATA_SOURCES: + err_msg = ( + f"Invalid data source '{data_source}'." + f" Available data sources are: {_AVAILABLE_DATA_SOURCES}" + ) + raise ValueError(err_msg) def _validate_navigation_mode(navigation_mode: str): diff --git a/tests/nldi_test.py b/tests/nldi_test.py index 462a5755..6a222ae1 100644 --- a/tests/nldi_test.py +++ b/tests/nldi_test.py @@ -1,5 +1,7 @@ +import pytest from geopandas import GeoDataFrame +import dataretrieval.nldi as nldi from dataretrieval.nldi import ( NLDI_API_BASE_URL, get_basin, @@ -9,6 +11,14 @@ ) +@pytest.fixture(autouse=True) +def _reset_data_source_cache(): + """Reset the module-level cache between tests.""" + nldi._AVAILABLE_DATA_SOURCES = None + yield + nldi._AVAILABLE_DATA_SOURCES = None + + def mock_request_data_sources(requests_mock): request_url = f"{NLDI_API_BASE_URL}/" available_data_sources = [ @@ -280,3 +290,26 @@ def test_search_for_features_by_lat_long(requests_mock): assert search_results["features"][0]["type"] == "Feature" assert search_results["features"][0]["geometry"]["type"] == "LineString" assert len(search_results["features"][0]["geometry"]["coordinates"]) == 27 + + +def test_validate_data_source_rejects_invalid_after_cache_populated(requests_mock): + """Once the cache is warm, invalid data sources must still raise ValueError. + + Regression: previously the validation check was nested inside the + cache-population branch, so all calls after the first silently passed. + """ + mock_request_data_sources(requests_mock) + + # First call: populates the cache with a valid source. + nldi._validate_data_source("WQP") + + # Second call with an invalid source must raise. + with pytest.raises(ValueError, match="Invalid data source 'not_a_real_source'"): + nldi._validate_data_source("not_a_real_source") + + +def test_validate_data_source_rejects_invalid_on_first_call(requests_mock): + """Cold-cache invalid sources must also raise.""" + mock_request_data_sources(requests_mock) + with pytest.raises(ValueError, match="Invalid data source"): + nldi._validate_data_source("not_a_real_source") From e8438617f8b89d0dbcb3aa7e53e92acaeeac69ac Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 10:05:12 -0500 Subject: [PATCH 2/3] Use 'is not None' guards so empty-string sources still validate Per copilot review on PR #246. The previous truthiness guard skipped validation for empty-string feature_source/data_source, letting bad inputs build invalid URLs. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/nldi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataretrieval/nldi.py b/dataretrieval/nldi.py index d613cf9a..e2897ade 100644 --- a/dataretrieval/nldi.py +++ b/dataretrieval/nldi.py @@ -256,10 +256,10 @@ def get_features( # validate the feature source and comid _validate_feature_source_comid(feature_source, feature_id, comid) # validate the data source - if data_source: + if data_source is not None: _validate_data_source(data_source) # validate feature source - if feature_source: + if feature_source is not None: _validate_data_source(feature_source) # validate the navigation mode if navigation_mode: From ef62f834087ac603f29f9272d1f0a0ebf895a1e6 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 14:37:22 -0500 Subject: [PATCH 3/3] Tidy nldi data-source validation per /simplify review Per /simplify review on PR #246: - Use `monkeypatch.setattr` in the autouse cache-reset fixture rather than direct attribute assignment + yield. Auto-restores on teardown and collapses the fixture to one line. - Drop redundant cold-cache regression test (`test_validate_data_source_rejects_invalid_on_first_call`); the warm-cache test exercises the cold path on its first call anyway. - Strip stale WHAT-comments (`# validate the data source`, `# validate feature source`, etc.) from the `get_features` validation block while we're touching adjacent lines. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/nldi.py | 4 ---- tests/nldi_test.py | 15 ++------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/dataretrieval/nldi.py b/dataretrieval/nldi.py index 2df9a1d8..eaa0f598 100644 --- a/dataretrieval/nldi.py +++ b/dataretrieval/nldi.py @@ -253,15 +253,11 @@ def get_features( raise ValueError( "navigation_mode is required if comid or data_source is provided" ) - # validate the feature source and comid _validate_feature_source_comid(feature_source, feature_id, comid) - # validate the data source if data_source is not None: _validate_data_source(data_source) - # validate feature source if feature_source is not None: _validate_data_source(feature_source) - # validate the navigation mode if navigation_mode: _validate_navigation_mode(navigation_mode) diff --git a/tests/nldi_test.py b/tests/nldi_test.py index 6a222ae1..d4f0f0e6 100644 --- a/tests/nldi_test.py +++ b/tests/nldi_test.py @@ -12,11 +12,9 @@ @pytest.fixture(autouse=True) -def _reset_data_source_cache(): +def _reset_data_source_cache(monkeypatch): """Reset the module-level cache between tests.""" - nldi._AVAILABLE_DATA_SOURCES = None - yield - nldi._AVAILABLE_DATA_SOURCES = None + monkeypatch.setattr(nldi, "_AVAILABLE_DATA_SOURCES", None) def mock_request_data_sources(requests_mock): @@ -300,16 +298,7 @@ def test_validate_data_source_rejects_invalid_after_cache_populated(requests_moc """ mock_request_data_sources(requests_mock) - # First call: populates the cache with a valid source. nldi._validate_data_source("WQP") - # Second call with an invalid source must raise. with pytest.raises(ValueError, match="Invalid data source 'not_a_real_source'"): nldi._validate_data_source("not_a_real_source") - - -def test_validate_data_source_rejects_invalid_on_first_call(requests_mock): - """Cold-cache invalid sources must also raise.""" - mock_request_data_sources(requests_mock) - with pytest.raises(ValueError, match="Invalid data source"): - nldi._validate_data_source("not_a_real_source")