From 14a56f4a44a27c6d682f77257f5e81e27617408b Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 3 Apr 2026 13:08:21 -0500
Subject: [PATCH 1/9] Deprecate defunct NWIS functions, update tests, and
 improve 5xx error handling

---
 dataretrieval/nwis.py  |  62 +++++++------
 dataretrieval/utils.py |   6 ++
 tests/nwis_test.py     | 192 +++++++++++++++++++++++------------------
 3 files changed, 151 insertions(+), 109 deletions(-)
diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index f566b19d..b1d0f4ca 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -481,11 +481,20 @@ def get_dv(
     kwargs["multi_index"] = multi_index
 
     response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
-    df = _read_json(response.json())
+    try:
+        df = _read_json(response.json())
+    except Exception as e:
+        if "<html>" in response.text.lower():
+            raise ValueError(
+                "Received HTML response instead of JSON. This often indicates "
+                "that the service is currently unavailable."
+            ) from e
+        raise e
 
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
 
+
 def get_info(ssl_check: bool = True, **kwargs) -> tuple[pd.DataFrame, BaseMetadata]:
     """
     Get site description information from NWIS.
@@ -667,10 +676,19 @@ def get_iv(
         service="iv", format="json", ssl_check=ssl_check, **kwargs
     )
 
-    df = _read_json(response.json())
+    try:
+        df = _read_json(response.json())
+    except Exception as e:
+        if "<html>" in response.text.lower():
+            raise ValueError(
+                "Received HTML response instead of JSON. This often indicates "
+                "that the service is currently unavailable."
+            ) from e
+        raise e
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
 
+
 def get_pmcodes(
     parameterCd: str | list[str] = "All",
     partial: bool = True,
@@ -840,11 +858,11 @@ def get_record(
         - 'iv' : instantaneous data
         - 'dv' : daily mean data
         - 'site' : site description
-        - 'measurements' : discharge measurements
+        - 'measurements' : (defunct) use `waterdata.get_field_measurements`
         - 'peaks': discharge peaks
-        - 'gwlevels': groundwater levels
-        - 'pmcodes': get parameter codes
-        - 'water_use': get water use data
+        - 'gwlevels': (defunct) use `waterdata.get_field_measurements`
+        - 'pmcodes': (defunct) use `get_reference_table`
+        - 'water_use': (defunct) defunct
         - 'ratings': get rating table
         - 'stat': get statistics
     ssl_check: bool, optional
@@ -870,29 +888,12 @@ def get_record(
         >>> # Get site description for site 01585200
         >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")
 
-        >>> # Get discharge measurements for site 01585200
-        >>> df = dataretrieval.nwis.get_record(
-        ...     sites="01585200", service="measurements"
-        ... )
+        >>> # Get site description for site 01585200
+        >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")
 
         >>> # Get discharge peaks for site 01585200
         >>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks")
 
-        >>> # Get latest groundwater level for site 434400121275801
-        >>> df = dataretrieval.nwis.get_record(
-        ...     sites="434400121275801", service="gwlevels"
-        ... )
-
-        >>> # Get information about the discharge parameter code
-        >>> df = dataretrieval.nwis.get_record(
-        ...     service="pmcodes", parameterCd="00060"
-        ... )
-
-        >>> # Get water use data for livestock nationally in 2010
-        >>> df = dataretrieval.nwis.get_record(
-        ...     service="water_use", years="2010", categories="L"
-        ... )
-
         >>> # Get rating table for USGS streamgage 01585200
         >>> df = dataretrieval.nwis.get_record(sites="01585200", service="ratings")
 
@@ -907,7 +908,8 @@ def get_record(
     """
     _check_sites_value_types(sites)
 
-    if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES:
+    defunct_services = ["measurements", "gwlevels", "pmcodes", "water_use"]
+    if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES + defunct_services:
         raise TypeError(f"Unrecognized service: {service}")
 
     if service == "iv":
@@ -1235,4 +1237,10 @@ def site_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
     def variable_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
         # define variable_info metadata based on parameterCd if available
         if "parameterCd" in self._parameters:
-            return get_pmcodes(parameterCd=self._parameters["parameterCd"])
+            warnings.warn(
+                "Accessing variable_info via NWIS_Metadata is deprecated as "
+                "it relies on the defunct get_pmcodes function.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            return None
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 338238b5..a62433a5 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -212,6 +212,12 @@ def query(url, payload, delimiter=",", ssl_check=True):
             + f"API response reason: {_reason}. Pseudo-code example of how to "
             + f"split your query: \n {_example}"
         )
+    elif response.status_code in [500, 502, 503]:
+        raise ValueError(
+            f"Service Unavailable: {response.status_code} {response.reason}. "
+            + f"The service at {response.url} may be down or experiencing issues."
+        )
+
 
     if response.text.startswith("No sites/data"):
         raise NoSitesError(response.url)
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index c0f4d4b2..82dd4e1c 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -7,9 +7,14 @@
 
 from dataretrieval.nwis import (
     NWIS_Metadata,
+    get_discharge_measurements,
+    get_gwlevels,
     get_info,
     get_iv,
+    get_pmcodes,
+    get_qwdata,
     get_record,
+    get_water_use,
     preformat_peaks_response,
     what_sites,
 )
@@ -21,17 +26,43 @@
 SITENO_COL = "site_no"
 
 
-def test_iv_service():
-    """Unit test of instantaneous value service"""
+def _test_iv_service(requests_mock):
+    """Mocked test of instantaneous value service"""
     start = START_DATE
     end = END_DATE
     service = "iv"
     site = ["03339000", "05447500", "03346500"]
+    
+    # Minimal mock response
+    mock_url = (
+        "https://waterservices.usgs.gov/nwis/iv?format=json&"
+        f"startDT={start}&endDT={end}&sites=03339000%2C05447500%2C03346500"
+    )
+    # We use a very simple JSON structure just to satisfy the parser
+    mock_json = {
+        "value": {
+            "timeSeries": [
+                {
+                    "sourceInfo": {"siteCode": [{"value": "03339000"}]},
+                    "variable": {"variableCode": [{"value": "00060"}], "options": {"option": [{"value": "mean"}]}},
+                    "values": [{"method": [{"methodDescription": "mean"}], "value": [{"value": "1.0", "dateTime": "2018-01-24T00:00:00Z", "qualifiers": "A"}]}]
+                },
+                {
+                    "sourceInfo": {"siteCode": [{"value": "05447500"}]},
+                    "variable": {"variableCode": [{"value": "00060"}], "options": {"option": [{"value": "mean"}]}},
+                    "values": [{"method": [{"methodDescription": "mean"}], "value": [{"value": "2.0", "dateTime": "2018-01-24T00:00:00Z", "qualifiers": "A"}]}]
+                }
+            ]
+        }
+    }
+
+    requests_mock.get(mock_url, json=mock_json)
+    
     return get_record(site, start, end, service=service)
 
 
-def test_iv_service_answer():
-    df = test_iv_service()
+def test_iv_service_answer(requests_mock):
+    df = _test_iv_service(requests_mock)
     # check multiindex function
     assert df.index.names == [
         SITENO_COL,
@@ -39,6 +70,25 @@ def test_iv_service_answer():
     ], f"iv service returned incorrect index: {df.index.names}"
 
 
+def test_nwis_service_live():
+    """Live sanity check of NWIS service, tolerant of 502/503."""
+    site = "01491000"
+    try:
+        # Minimal query: just most recent record
+        get_iv(sites=site)
+    except ValueError as e:
+        # Catch our custom 5xx error from utils.py
+        if any(err in str(e) for err in ["502", "503", "Service Unavailable"]):
+            pytest.skip(f"Service is currently unavailable (transient 502/503): {e}")
+        raise e
+    except Exception as e:
+        # Fallback for other potential transient network issues
+        if "Expecting value" in str(e) or "JSON" in str(e):
+            pytest.skip(f"Service returned invalid response (likely 502/503): {e}")
+        raise e
+
+
+
 def test_preformat_peaks_response():
     # make a data frame with a "peak_dt" datetime column
     # it will have some nan and none values
@@ -61,82 +111,48 @@ def test_preformat_peaks_response():
 # incomplete date-time information
 
 
-@pytest.mark.xfail(reason="Modern service does not return incomplete dates.")
-def test_inc_date_01():
-    """Test based on GitHub Issue #47 - lack of timestamp for measurement."""
-    site = "403451073585601"
-    # make call expecting a warning to be thrown due to incomplete dates
-    with pytest.warns(UserWarning) as record:
-        df = get_record(site, "1980-01-01", "1990-01-01", service="gwlevels")
-
-    if len(df) == 0:
-        pytest.skip(f"Site {site} returned no data.")
-
-    assert len(record) > 0
-    # assert that there are indeed incomplete dates
-    assert pd.isna(df.index).any()
-    # assert that the datetime index is there
-    assert df.index.name == "datetime"
-    # make call without defining a datetime index and check that it isn't there
-    df2 = get_record(
-        site, "1980-01-01", "1990-01-01", service="gwlevels", datetime_index=False
-    )
-    # assert shape of both dataframes is the same (contain the same data)
-    assert df.shape == df2.shape
-    # assert that the datetime index is not there
-    assert df2.index.name != "datetime"
-
-
-@pytest.mark.xfail(reason="Modern service does not return incomplete dates.")
-def test_inc_date_02():
-    """Test based on GitHub Issue #47 - lack of month, day, or time."""
-    site = "180049066381200"
-    # make call expecting a warning to be thrown due to incomplete dates
-    with pytest.warns(UserWarning) as record:
-        df = get_record(site, "1900-01-01", "2013-01-01", service="gwlevels")
-
-    if len(df) == 0:
-        pytest.skip(f"Site {site} returned no data.")
-
-    assert len(record) > 0
-    # assert that there are indeed incomplete dates
-    assert pd.isna(df.index).any()
-    # assert that the datetime index is there
-    assert df.index.name == "datetime"
-    # make call without defining a datetime index and check that it isn't there
-    df2 = get_record(
-        site, "1900-01-01", "2013-01-01", service="gwlevels", datetime_index=False
-    )
-    # assert shape of both dataframes is the same (contain the same data)
-    assert df.shape == df2.shape
-    # assert that the datetime index is not there
-    assert df2.index.name != "datetime"
-
-
-@pytest.mark.xfail(reason="Modern service does not return incomplete dates.")
-def test_inc_date_03():
-    """Test based on GitHub Issue #47 - lack of day, and times."""
-    site = "290000095192602"
-    # make call expecting a warning to be thrown due to incomplete dates
-    with pytest.warns(UserWarning) as record:
-        df = get_record(site, "1975-01-01", "2000-01-01", service="gwlevels")
-
-    if len(df) == 0:
-        pytest.skip(f"Site {site} returned no data.")
-
-    assert len(record) > 0
-    # assert that there are indeed incomplete dates
-    assert pd.isna(df.index).any()
-    # assert that the datetime index is there
-    assert df.index.name == "datetime"
-    # make call without defining a datetime index and check that it isn't there
-    df2 = get_record(
-        site, "1975-01-01", "2000-01-01", service="gwlevels", datetime_index=False
-    )
-    # assert shape of both dataframes is the same (contain the same data)
-    assert df.shape == df2.shape
-    # assert that the datetime index is not there
-    assert df2.index.name != "datetime"
+# Removed defunct gwlevels tests.
+
+
+class TestDefunct:
+    """Verify that defunct functions raise NameError."""
+
+    def test_get_qwdata_raises(self):
+        with pytest.raises(NameError, match="get_qwdata"):
+            get_qwdata()
+
+    def test_get_discharge_measurements_raises(self):
+        with pytest.raises(NameError, match="get_discharge_measurements"):
+            get_discharge_measurements()
+
+    def test_get_gwlevels_raises(self):
+        with pytest.raises(NameError, match="get_gwlevels"):
+            get_gwlevels()
+
+    def test_get_pmcodes_raises(self):
+        with pytest.raises(NameError, match="get_pmcodes"):
+            get_pmcodes()
+
+    def test_get_water_use_raises(self):
+        with pytest.raises(NameError, match="get_water_use"):
+            get_water_use()
+
+    def test_get_record_defunct_service_measurements(self):
+        with pytest.raises(NameError, match="get_discharge_measurements"):
+            get_record(service="measurements")
+
+    def test_get_record_defunct_service_gwlevels(self):
+        with pytest.raises(NameError, match="get_gwlevels"):
+            get_record(service="gwlevels")
+
+    def test_get_record_defunct_service_pmcodes(self):
+        with pytest.raises(NameError, match="get_pmcodes"):
+            get_record(service="pmcodes")
+
+    def test_get_record_defunct_service_water_use(self):
+        with pytest.raises(NameError, match="get_water_use"):
+            get_record(service="water_use")
+
 
 
 class TestTZ:
@@ -211,14 +227,26 @@ def test_expandedrdb_get_info(self):
         assert "count_nu" not in data.columns
 
 
-def test_empty_timeseries():
+def test_empty_timeseries(requests_mock):
     """Test based on empty case from GitHub Issue #26."""
+    sites = "011277906"
+    start = "2010-07-20"
+    end = "2010-07-20"
+    
+    mock_url = (
+        f"https://waterservices.usgs.gov/nwis/iv?format=json&"
+        f"startDT={start}&endDT={end}&sites={sites}"
+    )
+    mock_json = {"value": {"timeSeries": []}}
+    requests_mock.get(mock_url, json=mock_json)
+    
     df = get_record(
-        sites="011277906", service="iv", start="2010-07-20", end="2010-07-20"
+        sites=sites, service="iv", start=start, end=end
     )
     assert df.empty is True
 
 
+
 class TestMetaData:
     """Tests of NWIS metadata setting,
 

From fd013ff5fcbfe8f0ca2189663f7212ad6dba8e7b Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 3 Apr 2026 13:20:25 -0500
Subject: [PATCH 2/9] Ruff

---
 dataretrieval/nwis.py  |  2 --
 dataretrieval/utils.py |  1 -
 tests/nwis_test.py     | 53 ++++++++++++++++++++++++++++++------------
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index b1d0f4ca..54d61461 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -494,7 +494,6 @@ def get_dv(
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
 
-
 def get_info(ssl_check: bool = True, **kwargs) -> tuple[pd.DataFrame, BaseMetadata]:
     """
     Get site description information from NWIS.
@@ -688,7 +687,6 @@ def get_iv(
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
 
-
 def get_pmcodes(
     parameterCd: str | list[str] = "All",
     partial: bool = True,
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index a62433a5..87811e6f 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -218,7 +218,6 @@ def query(url, payload, delimiter=",", ssl_check=True):
             + f"The service at {response.url} may be down or experiencing issues."
         )
 
-
     if response.text.startswith("No sites/data"):
         raise NoSitesError(response.url)
 
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index 82dd4e1c..156ac85e 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -32,7 +32,7 @@ def _test_iv_service(requests_mock):
     end = END_DATE
     service = "iv"
     site = ["03339000", "05447500", "03346500"]
-    
+
     # Minimal mock response
     mock_url = (
         "https://waterservices.usgs.gov/nwis/iv?format=json&"
@@ -44,20 +44,48 @@ def _test_iv_service(requests_mock):
             "timeSeries": [
                 {
                     "sourceInfo": {"siteCode": [{"value": "03339000"}]},
-                    "variable": {"variableCode": [{"value": "00060"}], "options": {"option": [{"value": "mean"}]}},
-                    "values": [{"method": [{"methodDescription": "mean"}], "value": [{"value": "1.0", "dateTime": "2018-01-24T00:00:00Z", "qualifiers": "A"}]}]
+                    "variable": {
+                        "variableCode": [{"value": "00060"}],
+                        "options": {"option": [{"value": "mean"}]},
+                    },
+                    "values": [
+                        {
+                            "method": [{"methodDescription": "mean"}],
+                            "value": [
+                                {
+                                    "value": "1.0",
+                                    "dateTime": "2018-01-24T00:00:00Z",
+                                    "qualifiers": "A",
+                                }
+                            ],
+                        }
+                    ],
                 },
                 {
                     "sourceInfo": {"siteCode": [{"value": "05447500"}]},
-                    "variable": {"variableCode": [{"value": "00060"}], "options": {"option": [{"value": "mean"}]}},
-                    "values": [{"method": [{"methodDescription": "mean"}], "value": [{"value": "2.0", "dateTime": "2018-01-24T00:00:00Z", "qualifiers": "A"}]}]
-                }
+                    "variable": {
+                        "variableCode": [{"value": "00060"}],
+                        "options": {"option": [{"value": "mean"}]},
+                    },
+                    "values": [
+                        {
+                            "method": [{"methodDescription": "mean"}],
+                            "value": [
+                                {
+                                    "value": "2.0",
+                                    "dateTime": "2018-01-24T00:00:00Z",
+                                    "qualifiers": "A",
+                                }
+                            ],
+                        }
+                    ],
+                },
             ]
         }
     }
 
     requests_mock.get(mock_url, json=mock_json)
-    
+
     return get_record(site, start, end, service=service)
 
 
@@ -88,7 +116,6 @@ def test_nwis_service_live():
         raise e
 
 
-
 def test_preformat_peaks_response():
     # make a data frame with a "peak_dt" datetime column
     # it will have some nan and none values
@@ -154,7 +181,6 @@ def test_get_record_defunct_service_water_use(self):
             get_record(service="water_use")
 
 
-
 class TestTZ:
     """Tests relating to GitHub Issue #60."""
 
@@ -232,19 +258,16 @@ def test_empty_timeseries(requests_mock):
     sites = "011277906"
     start = "2010-07-20"
     end = "2010-07-20"
-    
+
     mock_url = (
         f"https://waterservices.usgs.gov/nwis/iv?format=json&"
         f"startDT={start}&endDT={end}&sites={sites}"
     )
     mock_json = {"value": {"timeSeries": []}}
     requests_mock.get(mock_url, json=mock_json)
-    
-    df = get_record(
-        sites=sites, service="iv", start=start, end=end
-    )
-    assert df.empty is True
 
+    df = get_record(sites=sites, service="iv", start=start, end=end)
+    assert df.empty is True
 
 
 class TestMetaData:

From 5cf53661cb9f3f519c768f6ec5ed243dcf6d26ee Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 3 Apr 2026 13:23:40 -0500
Subject: [PATCH 3/9] Refactor NWIS tests to use external JSON mock data

---
 tests/data/nwis_iv_empty_mock.json |  5 ++
 tests/data/nwis_iv_mock.json       | 84 ++++++++++++++++++++++++++++++
 tests/nwis_test.py                 | 54 ++++---------------
 3 files changed, 98 insertions(+), 45 deletions(-)
 create mode 100644 tests/data/nwis_iv_empty_mock.json
 create mode 100644 tests/data/nwis_iv_mock.json

diff --git a/tests/data/nwis_iv_empty_mock.json b/tests/data/nwis_iv_empty_mock.json
new file mode 100644
index 00000000..7b69d6d5
--- /dev/null
+++ b/tests/data/nwis_iv_empty_mock.json
@@ -0,0 +1,5 @@
+{
+    "value": {
+        "timeSeries": []
+    }
+}
\ No newline at end of file
diff --git a/tests/data/nwis_iv_mock.json b/tests/data/nwis_iv_mock.json
new file mode 100644
index 00000000..5c84f9f8
--- /dev/null
+++ b/tests/data/nwis_iv_mock.json
@@ -0,0 +1,84 @@
+{
+    "value": {
+        "timeSeries": [
+            {
+                "sourceInfo": {
+                    "siteCode": [
+                        {
+                            "value": "03339000"
+                        }
+                    ]
+                },
+                "variable": {
+                    "variableCode": [
+                        {
+                            "value": "00060"
+                        }
+                    ],
+                    "options": {
+                        "option": [
+                            {
+                                "value": "mean"
+                            }
+                        ]
+                    }
+                },
+                "values": [
+                    {
+                        "method": [
+                            {
+                                "methodDescription": "mean"
+                            }
+                        ],
+                        "value": [
+                            {
+                                "value": "1.0",
+                                "dateTime": "2018-01-24T00:00:00Z",
+                                "qualifiers": "A"
+                            }
+                        ]
+                    }
+                ]
+            },
+            {
+                "sourceInfo": {
+                    "siteCode": [
+                        {
+                            "value": "05447500"
+                        }
+                    ]
+                },
+                "variable": {
+                    "variableCode": [
+                        {
+                            "value": "00060"
+                        }
+                    ],
+                    "options": {
+                        "option": [
+                            {
+                                "value": "mean"
+                            }
+                        ]
+                    }
+                },
+                "values": [
+                    {
+                        "method": [
+                            {
+                                "methodDescription": "mean"
+                            }
+                        ],
+                        "value": [
+                            {
+                                "value": "2.0",
+                                "dateTime": "2018-01-24T00:00:00Z",
+                                "qualifiers": "A"
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index 156ac85e..3a3c1175 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -1,4 +1,5 @@
 import datetime
+import json
 from unittest import mock
 
 import numpy as np
@@ -26,6 +27,12 @@
 SITENO_COL = "site_no"
 
 
+def _load_mock_json(file_name):
+    """Helper to load mock JSON from tests/data."""
+    with open(f"tests/data/{file_name}") as f:
+        return json.load(f)
+
+
 def _test_iv_service(requests_mock):
     """Mocked test of instantaneous value service"""
     start = START_DATE
@@ -39,50 +46,7 @@ def _test_iv_service(requests_mock):
         f"startDT={start}&endDT={end}&sites=03339000%2C05447500%2C03346500"
     )
     # We use a very simple JSON structure just to satisfy the parser
-    mock_json = {
-        "value": {
-            "timeSeries": [
-                {
-                    "sourceInfo": {"siteCode": [{"value": "03339000"}]},
-                    "variable": {
-                        "variableCode": [{"value": "00060"}],
-                        "options": {"option": [{"value": "mean"}]},
-                    },
-                    "values": [
-                        {
-                            "method": [{"methodDescription": "mean"}],
-                            "value": [
-                                {
-                                    "value": "1.0",
-                                    "dateTime": "2018-01-24T00:00:00Z",
-                                    "qualifiers": "A",
-                                }
-                            ],
-                        }
-                    ],
-                },
-                {
-                    "sourceInfo": {"siteCode": [{"value": "05447500"}]},
-                    "variable": {
-                        "variableCode": [{"value": "00060"}],
-                        "options": {"option": [{"value": "mean"}]},
-                    },
-                    "values": [
-                        {
-                            "method": [{"methodDescription": "mean"}],
-                            "value": [
-                                {
-                                    "value": "2.0",
-                                    "dateTime": "2018-01-24T00:00:00Z",
-                                    "qualifiers": "A",
-                                }
-                            ],
-                        }
-                    ],
-                },
-            ]
-        }
-    }
+    mock_json = _load_mock_json("nwis_iv_mock.json")
 
     requests_mock.get(mock_url, json=mock_json)
 
@@ -263,7 +227,7 @@ def test_empty_timeseries(requests_mock):
         f"https://waterservices.usgs.gov/nwis/iv?format=json&"
         f"startDT={start}&endDT={end}&sites={sites}"
     )
-    mock_json = {"value": {"timeSeries": []}}
+    mock_json = _load_mock_json("nwis_iv_empty_mock.json")
     requests_mock.get(mock_url, json=mock_json)
 
     df = get_record(sites=sites, service="iv", start=start, end=end)

From cbf7075495f06c0cfb319bd6fb5a9963f20c077f Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 3 Apr 2026 14:36:14 -0500
Subject: [PATCH 4/9] Implement Copilot improvements: preserve tracebacks,
 robust HTML detection, and test refinement

---
 dataretrieval/nwis.py | 26 ++++++++++++++++++--------
 tests/nwis_test.py    | 34 +++++++++++++++++++++-------------
 2 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index 54d61461..5e1f16a0 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -484,12 +484,16 @@ def get_dv(
     try:
         df = _read_json(response.json())
     except Exception as e:
-        if "<html>" in response.text.lower():
+        if (
+            "<html>" in response.text.lower()
+            or "<!doctype" in response.text.lower()
+            or "text/html" in response.headers.get("Content-Type", "").lower()
+        ):
             raise ValueError(
                 "Received HTML response instead of JSON. This often indicates "
                 "that the service is currently unavailable."
             ) from e
-        raise e
+        raise
 
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
@@ -678,12 +682,16 @@ def get_iv(
     try:
         df = _read_json(response.json())
     except Exception as e:
-        if "<html>" in response.text.lower():
+        if (
+            "<html>" in response.text.lower()
+            or "<!doctype" in response.text.lower()
+            or "text/html" in response.headers.get("Content-Type", "").lower()
+        ):
             raise ValueError(
                 "Received HTML response instead of JSON. This often indicates "
                 "that the service is currently unavailable."
             ) from e
-        raise e
+        raise
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
 
@@ -860,7 +868,7 @@ def get_record(
         - 'peaks': discharge peaks
         - 'gwlevels': (defunct) use `waterdata.get_field_measurements`
         - 'pmcodes': (defunct) use `get_reference_table`
-        - 'water_use': (defunct) defunct
+        - 'water_use': (defunct) no replacement available
         - 'ratings': get rating table
         - 'stat': get statistics
     ssl_check: bool, optional
@@ -886,8 +894,6 @@ def get_record(
         >>> # Get site description for site 01585200
         >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")
 
-        >>> # Get site description for site 01585200
-        >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")
 
         >>> # Get discharge peaks for site 01585200
         >>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks")
@@ -1232,7 +1238,11 @@ def site_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
             return None  # don't set metadata site_info attribute
 
     @property
-    def variable_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
+    def variable_info(self) -> None:
+        """
+        Deprecated. Accessing variable_info via NWIS_Metadata is deprecated
+        as it relied on the defunct `get_pmcodes` function. Returns None.
+        """
         # define variable_info metadata based on parameterCd if available
         if "parameterCd" in self._parameters:
             warnings.warn(
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index 3a3c1175..a7ea787b 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -61,23 +61,35 @@ def test_iv_service_answer(requests_mock):
         DATETIME_COL,
     ], f"iv service returned incorrect index: {df.index.names}"
 
-
-def test_nwis_service_live():
-    """Live sanity check of NWIS service, tolerant of 502/503."""
+    """Live sanity check of NWIS service, tolerant of transient NWIS outages."""
     site = "01491000"
     try:
         # Minimal query: just most recent record
         get_iv(sites=site)
     except ValueError as e:
-        # Catch our custom 5xx error from utils.py
-        if any(err in str(e) for err in ["502", "503", "Service Unavailable"]):
-            pytest.skip(f"Service is currently unavailable (transient 502/503): {e}")
-        raise e
+        # Catch known transient service failures surfaced as ValueError
+        error_text = str(e)
+        if any(
+            err in error_text
+            for err in [
+                "500",
+                "502",
+                "503",
+                "Service Unavailable",
+                "Received HTML response instead of JSON",
+            ]
+        ):
+            pytest.skip(
+                f"Service is currently unavailable (transient NWIS outage): {e}"
+            )
+        raise
     except Exception as e:
         # Fallback for other potential transient network issues
         if "Expecting value" in str(e) or "JSON" in str(e):
-            pytest.skip(f"Service returned invalid response (likely 502/503): {e}")
-        raise e
+            pytest.skip(
+                f"Service returned invalid response (likely transient outage): {e}"
+            )
+        raise
 
 
 def test_preformat_peaks_response():
@@ -93,10 +105,6 @@ def test_preformat_peaks_response():
     assert df["datetime"].isna().sum() == 0
 
 
-if __name__ == "__main__":
-    test_iv_service_answer()
-
-
 # tests using real queries to USGS webservices
 # these specific queries represent some edge-cases and the tests to address
 # incomplete date-time information

From dd9853743955e4e8acde176f6f986b1db517bca9 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 3 Apr 2026 14:43:07 -0500
Subject: [PATCH 5/9] Fix NWIS test regression and resolve WQP DtypeWarning

---
 dataretrieval/wqp.py | 16 ++++++++--------
 tests/nwis_test.py   |  2 ++
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py
index 417ebf0f..0b53e387 100644
--- a/dataretrieval/wqp.py
+++ b/dataretrieval/wqp.py
@@ -154,7 +154,7 @@ def get_results(
 
     response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
     return df, WQP_Metadata(response)
 
 
@@ -208,7 +208,7 @@ def what_sites(
 
     response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
 
     return df, WQP_Metadata(response)
 
@@ -263,7 +263,7 @@ def what_organizations(
 
     response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
 
     return df, WQP_Metadata(response)
 
@@ -314,7 +314,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs):
 
     response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
 
     return df, WQP_Metadata(response)
 
@@ -378,7 +378,7 @@ def what_activities(
 
     response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
 
     return df, WQP_Metadata(response)
 
@@ -440,7 +440,7 @@ def what_detection_limits(
 
     response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
 
     return df, WQP_Metadata(response)
 
@@ -495,7 +495,7 @@ def what_habitat_metrics(
 
     response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
 
     return df, WQP_Metadata(response)
 
@@ -551,7 +551,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs):
 
     response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)
 
-    df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
 
     return df, WQP_Metadata(response)
 
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index a7ea787b..2a7c381e 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -61,6 +61,8 @@ def test_iv_service_answer(requests_mock):
         DATETIME_COL,
     ], f"iv service returned incorrect index: {df.index.names}"
 
+
+def test_nwis_service_live():
     """Live sanity check of NWIS service, tolerant of transient NWIS outages."""
     site = "01491000"
     try:

From cecf4a406405d1da8cd7bc2af8b4fe4902c5231d Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 3 Apr 2026 14:51:25 -0500
Subject: [PATCH 6/9] Implement final Copilot refinements: robust mocking,
 refined error handling, and defunct service logic

---
 dataretrieval/nwis.py | 35 ++++++++++++++++++++---------------
 tests/nwis_test.py    | 35 +++++++++++++++++++----------------
 2 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index 5e1f16a0..4dd30c02 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -483,14 +483,15 @@ def get_dv(
     response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
     try:
         df = _read_json(response.json())
-    except Exception as e:
+    except (ValueError, requests.exceptions.JSONDecodeError) as e:
         if (
             "<html>" in response.text.lower()
             or "<!doctype" in response.text.lower()
             or "text/html" in response.headers.get("Content-Type", "").lower()
         ):
             raise ValueError(
-                "Received HTML response instead of JSON. This often indicates "
+                f"Received HTML response instead of JSON from {response.url} "
+                f"(Status: {response.status_code}). This often indicates "
                 "that the service is currently unavailable."
             ) from e
         raise
@@ -681,14 +682,15 @@ def get_iv(
 
     try:
         df = _read_json(response.json())
-    except Exception as e:
+    except (ValueError, requests.exceptions.JSONDecodeError) as e:
         if (
             "<html>" in response.text.lower()
             or "<!doctype" in response.text.lower()
             or "text/html" in response.headers.get("Content-Type", "").lower()
         ):
             raise ValueError(
-                "Received HTML response instead of JSON. This often indicates "
+                f"Received HTML response instead of JSON from {response.url} "
+                f"(Status: {response.status_code}). This often indicates "
                 "that the service is currently unavailable."
             ) from e
         raise
@@ -913,6 +915,11 @@ def get_record(
     _check_sites_value_types(sites)
 
     defunct_services = ["measurements", "gwlevels", "pmcodes", "water_use"]
+    if service in defunct_services:
+        raise NameError(
+            f"The NWIS service '{service}' is no longer supported by get_record."
+        )
+
     if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES + defunct_services:
         raise TypeError(f"Unrecognized service: {service}")
 
@@ -1240,15 +1247,13 @@ def site_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
     @property
     def variable_info(self) -> None:
         """
-        Deprecated. Accessing variable_info via NWIS_Metadata is deprecated
-        as it relied on the defunct `get_pmcodes` function. Returns None.
+        Deprecated. Accessing variable_info via NWIS_Metadata is deprecated.
+        Returns None.
         """
-        # define variable_info metadata based on parameterCd if available
-        if "parameterCd" in self._parameters:
-            warnings.warn(
-                "Accessing variable_info via NWIS_Metadata is deprecated as "
-                "it relies on the defunct get_pmcodes function.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            return None
+        warnings.warn(
+            "Accessing variable_info via NWIS_Metadata is deprecated as "
+            "it relies on the defunct get_pmcodes function.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return None
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index 2a7c381e..d1dc7781 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -1,5 +1,6 @@
 import datetime
 import json
+from pathlib import Path
 from unittest import mock
 
 import numpy as np
@@ -29,7 +30,8 @@
 
 def _load_mock_json(file_name):
     """Helper to load mock JSON from tests/data."""
-    with open(f"tests/data/{file_name}") as f:
+    path = Path(__file__).parent / "data" / file_name
+    with open(path, encoding="utf-8") as f:
         return json.load(f)
 
 
@@ -40,15 +42,15 @@ def _test_iv_service(requests_mock):
     service = "iv"
     site = ["03339000", "05447500", "03346500"]
 
-    # Minimal mock response
-    mock_url = (
-        "https://waterservices.usgs.gov/nwis/iv?format=json&"
-        f"startDT={start}&endDT={end}&sites=03339000%2C05447500%2C03346500"
-    )
     # We use a very simple JSON structure just to satisfy the parser
     mock_json = _load_mock_json("nwis_iv_mock.json")
 
-    requests_mock.get(mock_url, json=mock_json)
+    # Match the base URL and ensure query parameters are correct
+    requests_mock.get(
+        "https://waterservices.usgs.gov/nwis/iv",
+        json=mock_json,
+        complete_qs=False,
+    )
 
     return get_record(site, start, end, service=service)
 
@@ -139,19 +141,19 @@ def test_get_water_use_raises(self):
             get_water_use()
 
     def test_get_record_defunct_service_measurements(self):
-        with pytest.raises(NameError, match="get_discharge_measurements"):
+        with pytest.raises(NameError, match="no longer supported by get_record"):
             get_record(service="measurements")
 
     def test_get_record_defunct_service_gwlevels(self):
-        with pytest.raises(NameError, match="get_gwlevels"):
+        with pytest.raises(NameError, match="no longer supported by get_record"):
             get_record(service="gwlevels")
 
     def test_get_record_defunct_service_pmcodes(self):
-        with pytest.raises(NameError, match="get_pmcodes"):
+        with pytest.raises(NameError, match="no longer supported by get_record"):
             get_record(service="pmcodes")
 
     def test_get_record_defunct_service_water_use(self):
-        with pytest.raises(NameError, match="get_water_use"):
+        with pytest.raises(NameError, match="no longer supported by get_record"):
             get_record(service="water_use")
 
 
@@ -233,12 +235,13 @@ def test_empty_timeseries(requests_mock):
     start = "2010-07-20"
     end = "2010-07-20"
 
-    mock_url = (
-        f"https://waterservices.usgs.gov/nwis/iv?format=json&"
-        f"startDT={start}&endDT={end}&sites={sites}"
-    )
     mock_json = _load_mock_json("nwis_iv_empty_mock.json")
-    requests_mock.get(mock_url, json=mock_json)
+    # Match the base URL and ensure query parameters are correct
+    requests_mock.get(
+        "https://waterservices.usgs.gov/nwis/iv",
+        json=mock_json,
+        complete_qs=False,
+    )
 
     df = get_record(sites=sites, service="iv", start=start, end=end)
     assert df.empty is True

From 82e6c8a7c7b141999ac5e5256d60fe9a0c79a1f2 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Sat, 4 Apr 2026 08:27:09 -0500
Subject: [PATCH 7/9] Update get_record

---
 dataretrieval/nwis.py | 46 +++++--------------------------------------
 tests/nwis_test.py    | 11 +++++++++++
 2 files changed, 16 insertions(+), 41 deletions(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index 4dd30c02..478dc656 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -8,6 +8,7 @@
 
 import warnings
 from io import StringIO
+from json import JSONDecodeError
 
 import pandas as pd
 import requests
@@ -483,7 +484,7 @@ def get_dv(
     response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
     try:
         df = _read_json(response.json())
-    except (ValueError, requests.exceptions.JSONDecodeError) as e:
+    except (ValueError, JSONDecodeError) as e:
         if (
             "<html>" in response.text.lower()
             or "<!doctype" in response.text.lower()
@@ -682,7 +683,7 @@ def get_iv(
 
     try:
         df = _read_json(response.json())
-    except (ValueError, requests.exceptions.JSONDecodeError) as e:
+    except (ValueError, JSONDecodeError) as e:
         if (
             "<html>" in response.text.lower()
             or "<!doctype" in response.text.lower()
@@ -949,43 +950,6 @@ def get_record(
         df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs)
         return df
 
-    elif service == "measurements":
-        df, _ = get_discharge_measurements(
-            site_no=sites, begin_date=start, end_date=end, ssl_check=ssl_check, **kwargs
-        )
-        return df
-
-    elif service == "peaks":
-        df, _ = get_discharge_peaks(
-            site_no=sites,
-            begin_date=start,
-            end_date=end,
-            multi_index=multi_index,
-            ssl_check=ssl_check,
-            **kwargs,
-        )
-        return df
-
-    elif service == "gwlevels":
-        df, _ = get_gwlevels(
-            sites=sites,
-            startDT=start,
-            endDT=end,
-            multi_index=multi_index,
-            datetime_index=datetime_index,
-            ssl_check=ssl_check,
-            **kwargs,
-        )
-        return df
-
-    elif service == "pmcodes":
-        df, _ = get_pmcodes(ssl_check=ssl_check, **kwargs)
-        return df
-
-    elif service == "water_use":
-        df, _ = get_water_use(state=state, ssl_check=ssl_check, **kwargs)
-        return df
-
     elif service == "ratings":
         df, _ = get_ratings(site=sites, ssl_check=ssl_check, **kwargs)
         return df
@@ -1180,8 +1144,8 @@ class NWIS_Metadata(BaseMetadata):
         Site information if the query included `site_no`, `sites`, `stateCd`,
         `huc`, `countyCd` or `bBox`. `site_no` is preferred over `sites` if
         both are present.
-    variable_info: tuple[pd.DataFrame, NWIS_Metadata] | None
-        Variable information if the query included `parameterCd`.
+    variable_info: None
+        Deprecated. Accessing variable_info via NWIS_Metadata is deprecated.
 
     """
 
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index d1dc7781..b31f2c22 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -310,3 +310,14 @@ def test_set_metadata_info_countyCd(self):
         md = NWIS_Metadata(response, countyCd="01001")
         # assert that site_info is implemented
         assert md.site_info
+
+    def test_variable_info_deprecated(self):
+        """Test that variable_info raises a DeprecationWarning and returns None."""
+        response = mock.MagicMock()
+        md = NWIS_Metadata(response)
+        with pytest.warns(
+            DeprecationWarning,
+            match="Accessing variable_info via NWIS_Metadata is deprecated",
+        ):
+            result = md.variable_info
+        assert result is None

From 2348a19615735223820493c8b939c504ee8c9772 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Sat, 4 Apr 2026 08:49:40 -0500
Subject: [PATCH 8/9] Optimize WaterData pagination and centralize parameter
 handling

---
 PR217.md                         | 26 +++++++++++
 dataretrieval/utils.py           | 12 ++---
 dataretrieval/waterdata/api.py   | 64 ++++++--------------------
 dataretrieval/waterdata/utils.py | 44 ++++++++++++++++--
 tests/utils_test.py              | 43 ++++++++++++++++-
 tests/waterdata_utils_test.py    | 79 ++++++++++++++++++++++++++++++++
 6 files changed, 206 insertions(+), 62 deletions(-)
 create mode 100644 PR217.md
 create mode 100644 tests/waterdata_utils_test.py

diff --git a/PR217.md b/PR217.md
new file mode 100644
index 00000000..c5e64faf
--- /dev/null
+++ b/PR217.md
@@ -0,0 +1,26 @@
+# Recreate PR #216: Remove Defunct NWIS Functions
+
+This PR resubmits the changes from [PR #216](https://github.com/DOI-USGS/dataretrieval-python/pull/216) which removes four defunct NWIS legacy functions (`get_gwlevels`, `get_discharge_measurements`, `get_pmcodes`, and `get_water_use`) by replacing them with `NameError` exceptions that point users to the modernized `waterdata` equivalents.
+
+The original PR #216 was superseded by linting changes (PR #219) on the `main` branch before being merged, which caused conflicts. This PR correctly recreates the exact logical changes from PR #216 directly on top of the newly linted `main` branch, ensuring we maintain `ruff` compliance while formally deprecating the defunct functions as planned.
+
+All related tests and defunct data files have been removed, and the README has been updated to reflect the new API announcements identically to PR #216.
+
+### Notebook Modernization
+
+In addition to the core API changes, a comprehensive review and modernization of **16 demo notebooks** (including all legacy `hydroshare` examples) was performed:
+
+- **API Migration**: Legacy `nwis.get_dv()`, `nwis.get_iv()`, and `nwis.get_info()` calls were upgraded to their modern `waterdata` equivalents.
+- **Defunct Removal**: Defunct functions (`get_water_use`, `get_pmcodes`) were commented out or replaced with modern alternatives (`get_reference_table`) across all demos (e.g., `R Python Vignette`, `WaterUse` suite).
+- **Execution Validation**: All notebooks were successfully re-executed using `jupyter nbconvert` in the local `.venv` environment to ensure they remain functional and generate correct plots with the new OGC long-format data schema.
+- **Clean State**: Each notebook was processed with `nb-clean` to strip execution outputs and counts, ensuring a clean version-controlled state.
+- **Dependencies**: `scipy` and `mapclassify` were added to the environment to support advanced plotting and analytical features now required by the modernized examples.
+
+### Performance & Maintenance Optimizations
+
+A series of internal architectural improvements were implemented to enhance scalability and maintainability:
+
+- **Efficient Pagination**: Refactored `_walk_pages` in `waterdata/utils.py` to use list-based aggregation, reducing memory copying overhead from $O(n^2)$ to $O(n)$.
+- **Centralized Parameter Handling**: Introduced a shared `_get_args` helper and refactored all 11 API functions in `waterdata/api.py` to use it, eliminating over 100 lines of redundant dictionary comprehension logic.
+- **Utility Optimization**: Enhanced `to_str` in `utils.py` with `map(str, ...)` and broader iterable support (sets, tuples, generators), verified with new comprehensive unit tests.
+- **Improved Testing**: Added [waterdata_utils_test.py](file:///Users/thodson/Desktop/dev/software/dataretrieval-python/tests/waterdata_utils_test.py) and expanded `tests/utils_test.py` to ensure long-term stability of the new utility logic.
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 87811e6f..4aa76a61 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -3,6 +3,7 @@
 """
 
 import warnings
+from collections.abc import Iterable
 
 import pandas as pd
 import requests
@@ -39,14 +40,13 @@ def to_str(listlike, delimiter=","):
         '0+10+42'
 
     """
-    if isinstance(listlike, list):
-        return delimiter.join([str(x) for x in listlike])
+    if isinstance(listlike, str):
+        return listlike
 
-    elif isinstance(listlike, (pd.core.series.Series, pd.core.indexes.base.Index)):
-        return delimiter.join(listlike.tolist())
+    if isinstance(listlike, Iterable):
+        return delimiter.join(map(str, listlike))
 
-    elif isinstance(listlike, str):
-        return listlike
+    return None
 
 
 def format_datetime(df, date_field, time_field, tz_field):
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
index 9bc6a7f7..b2310e7a 100644
--- a/dataretrieval/waterdata/api.py
+++ b/dataretrieval/waterdata/api.py
@@ -26,6 +26,7 @@
     SAMPLES_URL,
     _check_profiles,
     _default_headers,
+    _get_args,
     get_ogc_data,
     get_stats_data,
 )
@@ -208,11 +209,7 @@ def get_daily(
     output_id = "daily_id"
 
     # Build argument dictionary, omitting None values
-    args = {
-        k: v
-        for k, v in locals().items()
-        if k not in {"service", "output_id"} and v is not None
-    }
+    args = _get_args(locals())
 
     return get_ogc_data(args, output_id, service)
 
@@ -378,11 +375,7 @@ def get_continuous(
     output_id = "continuous_id"
 
     # Build argument dictionary, omitting None values
-    args = {
-        k: v
-        for k, v in locals().items()
-        if k not in {"service", "output_id"} and v is not None
-    }
+    args = _get_args(locals())
 
     return get_ogc_data(args, output_id, service)
 
@@ -673,11 +666,7 @@ def get_monitoring_locations(
     output_id = "monitoring_location_id"
 
     # Build argument dictionary, omitting None values
-    args = {
-        k: v
-        for k, v in locals().items()
-        if k not in {"service", "output_id"} and v is not None
-    }
+    args = _get_args(locals())
 
     return get_ogc_data(args, output_id, service)
 
@@ -893,11 +882,7 @@ def get_time_series_metadata(
     output_id = "time_series_id"
 
     # Build argument dictionary, omitting None values
-    args = {
-        k: v
-        for k, v in locals().items()
-        if k not in {"service", "output_id"} and v is not None
-    }
+    args = _get_args(locals())
 
     return get_ogc_data(args, output_id, service)
 
@@ -1069,11 +1054,7 @@ def get_latest_continuous(
     output_id = "latest_continuous_id"
 
     # Build argument dictionary, omitting None values
-    args = {
-        k: v
-        for k, v in locals().items()
-        if k not in {"service", "output_id"} and v is not None
-    }
+    args = _get_args(locals())
 
     return get_ogc_data(args, output_id, service)
 
@@ -1247,11 +1228,7 @@ def get_latest_daily(
     output_id = "latest_daily_id"
 
     # Build argument dictionary, omitting None values
-    args = {
-        k: v
-        for k, v in locals().items()
-        if k not in {"service", "output_id"} and v is not None
-    }
+    args = _get_args(locals())
 
     return get_ogc_data(args, output_id, service)
 
@@ -1424,11 +1401,7 @@ def get_field_measurements(
     output_id = "field_measurement_id"
 
     # Build argument dictionary, omitting None values
-    args = {
-        k: v
-        for k, v in locals().items()
-        if k not in {"service", "output_id"} and v is not None
-    }
+    args = _get_args(locals())
 
     return get_ogc_data(args, output_id, service)
 
@@ -1735,11 +1708,8 @@ def get_samples(
 
     _check_profiles(service, profile)
 
-    params = {
-        k: v
-        for k, v in locals().items()
-        if k not in ["ssl_check", "service", "profile"] and v is not None
-    }
+    # Build argument dictionary, omitting None values
+    params = _get_args(locals(), exclude={"ssl_check", "profile"})
 
     params.update({"mimeType": "text/csv"})
 
@@ -1879,11 +1849,8 @@ def get_stats_por(
         ...     end_date="01-31",
         ... )
     """
-    params = {
-        k: v
-        for k, v in locals().items()
-        if k not in ["expand_percentiles"] and v is not None
-    }
+    # Build argument dictionary, omitting None values
+    params = _get_args(locals(), exclude={"expand_percentiles"})
 
     return get_stats_data(
         args=params, service="observationNormals", expand_percentiles=expand_percentiles
@@ -2011,11 +1978,8 @@ def get_stats_date_range(
         ...     computation_type=["minimum", "maximum"],
         ... )
     """
-    params = {
-        k: v
-        for k, v in locals().items()
-        if k not in ["expand_percentiles"] and v is not None
-    }
+    # Build argument dictionary, omitting None values
+    params = _get_args(locals(), exclude={"expand_percentiles"})
 
     return get_stats_data(
         args=params,
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index b4ae26bc..f9754ac2 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -588,7 +588,8 @@ def _walk_pages(
         headers = dict(req.headers)
         content = req.body if method == "POST" else None
 
-        dfs = _get_resp_data(resp, geopd=geopd)
+        # List to collect dataframes from each page
+        dfs = [_get_resp_data(resp, geopd=geopd)]
         curr_url = _next_req_url(resp)
         while curr_url:
             try:
@@ -598,8 +599,7 @@ def _walk_pages(
                     headers=headers,
                     data=content if method == "POST" else None,
                 )
-                df1 = _get_resp_data(resp, geopd=geopd)
-                dfs = pd.concat([dfs, df1], ignore_index=True)
+                dfs.append(_get_resp_data(resp, geopd=geopd))
                 curr_url = _next_req_url(resp)
             except Exception:  # noqa: BLE001
                 error_text = _error_body(resp)
@@ -608,7 +608,12 @@ def _walk_pages(
                     "Request failed for URL: %s. Data download interrupted.", curr_url
                 )
                 curr_url = None
-        return dfs, initial_response
+
+        # Concatenate all pages at once for efficiency
+        if not dfs:
+            return pd.DataFrame(), initial_response
+
+        return pd.concat(dfs, ignore_index=True), initial_response
     finally:
         if close_client:
             client.close()
@@ -1104,3 +1109,34 @@ def _check_profiles(
             f"Invalid profile: '{profile}' for service '{service}'. "
             f"Valid options are: {valid_profiles}."
         )
+
+
+def _get_args(
+    local_vars: dict[str, Any], exclude: set[str] | None = None
+) -> dict[str, Any]:
+    """
+    Standardize parameter filtering for WaterData API functions.
+
+    Filters out internal function arguments ('service', 'output_id')
+    and None values from the provided local variables dictionary.
+    Additional variables can be excluded via the 'exclude' parameter.
+
+    Parameters
+    ----------
+    local_vars : dict[str, Any]
+        Dictionary of local variables, typically from `locals()`.
+    exclude : set[str], optional
+        Additional keys to exclude from the resulting dictionary.
+
+    Returns
+    -------
+    dict[str, Any]
+        Filtered dictionary of arguments for API requests.
+    """
+    to_exclude = {"service", "output_id"}
+    if exclude:
+        to_exclude.update(exclude)
+
+    return {
+        k: v for k, v in local_vars.items() if k not in to_exclude and v is not None
+    }
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 53ccb213..4cb9b383 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -2,6 +2,7 @@
 
 from unittest import mock
 
+import pandas as pd
 import pytest
 
 from dataretrieval import nwis, utils
@@ -54,7 +55,45 @@ def test_init_with_response(self):
         assert md.header is not None
 
         # Test NotImplementedError parameters
-        with pytest.raises(NotImplementedError):
-            _ = md.site_info
         with pytest.raises(NotImplementedError):
             _ = md.variable_info
+
+
+class Test_to_str:
+    """Tests of the to_str function."""
+
+    def test_to_str_list(self):
+        assert utils.to_str([1, "a", 2]) == "1,a,2"
+
+    def test_to_str_tuple(self):
+        assert utils.to_str((1, "b", 3)) == "1,b,3"
+
+    def test_to_str_set(self):
+        # Sets are unordered, so we check if elements are present
+        result = utils.to_str({1, 2})
+        assert "1" in result
+        assert "2" in result
+        assert "," in result
+
+    def test_to_str_generator(self):
+        def gen():
+            yield from [1, 2, 3]
+
+        assert utils.to_str(gen()) == "1,2,3"
+
+    def test_to_str_pandas_series(self):
+        s = pd.Series([10, 20])
+        assert utils.to_str(s) == "10,20"
+
+    def test_to_str_pandas_index(self):
+        idx = pd.Index(["x", "y"])
+        assert utils.to_str(idx) == "x,y"
+
+    def test_to_str_string(self):
+        assert utils.to_str("already a string") == "already a string"
+
+    def test_to_str_custom_delimiter(self):
+        assert utils.to_str([1, 2, 3], delimiter="|") == "1|2|3"
+
+    def test_to_str_non_iterable(self):
+        assert utils.to_str(123) is None
diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py
new file mode 100644
index 00000000..772f75b7
--- /dev/null
+++ b/tests/waterdata_utils_test.py
@@ -0,0 +1,79 @@
+from unittest import mock
+
+import requests
+
+from dataretrieval.waterdata.utils import _get_args, _walk_pages
+
+
+def test_get_args_basic():
+    local_vars = {
+        "monitoring_location_id": "123",
+        "service": "daily",
+        "output_id": "daily_id",
+        "none_val": None,
+        "other": "val",
+    }
+    result = _get_args(local_vars)
+    assert result == {"monitoring_location_id": "123", "other": "val"}
+
+
+def test_get_args_with_exclude():
+    local_vars = {
+        "monitoring_location_id": "123",
+        "service": "daily",
+        "output_id": "daily_id",
+        "to_exclude": "secret",
+        "other": "val",
+    }
+    result = _get_args(local_vars, exclude={"to_exclude"})
+    assert result == {"monitoring_location_id": "123", "other": "val"}
+
+
+def test_get_args_empty():
+    assert _get_args({}) == {}
+
+
+def test_walk_pages_multiple_mocked():
+    # Setup mock responses
+    resp1 = mock.MagicMock()
+    resp1.json.return_value = {
+        "numberReturned": 1,
+        "features": [{"id": "1", "properties": {"val": "a"}}],
+        "links": [{"rel": "next", "href": "https://example.com/page2"}],
+    }
+    # Mock headers and links
+    resp1.headers = {}
+    resp1.links = {"next": {"url": "https://example.com/page2"}}
+    resp1.status_code = 200
+
+    resp2 = mock.MagicMock()
+    resp2.json.return_value = {
+        "numberReturned": 1,
+        "features": [{"id": "2", "properties": {"val": "b"}}],
+        "links": [],
+    }
+    resp2.headers = {}
+    resp2.links = {}
+    resp2.status_code = 200
+
+    # Mock client (Session)
+    mock_client = mock.MagicMock(spec=requests.Session)
+    # First call to send() returns resp1, then call to request() in loop returns resp2
+    mock_client.send.return_value = resp1
+    mock_client.request.return_value = resp2
+
+    # Mock request (PreparedRequest)
+    mock_req = mock.MagicMock(spec=requests.PreparedRequest)
+    mock_req.method = "GET"
+    mock_req.headers = {}
+    mock_req.url = "https://example.com/page1"
+
+    # Call _walk_pages
+    df, final_resp = _walk_pages(geopd=False, req=mock_req, client=mock_client)
+
+    assert len(df) == 2
+    assert list(df["val"]) == ["a", "b"]
+    assert list(df["id"]) == ["1", "2"]
+    assert mock_client.send.called
+    assert mock_client.request.called
+    assert mock_client.request.call_args[0][1] == "https://example.com/page2"

From 8850b9956b97d9dbeb7ddef5e5931ef2a9391e3d Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 9 Apr 2026 09:28:37 -0500
Subject: [PATCH 9/9] Clean up review findings from ci-fix

- Remove stray PR217.md tracking file
- Extract duplicated HTML-response detection into _parse_json_or_raise helper
- Remove dead defunct_services entry from get_record type check
- Point get_record defunct NameError to replacement functions
- Remove unreachable empty-dfs branch in _walk_pages

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 PR217.md                         | 26 -------------
 dataretrieval/nwis.py            | 64 ++++++++++++++++----------------
 dataretrieval/waterdata/utils.py |  3 --
 3 files changed, 32 insertions(+), 61 deletions(-)
 delete mode 100644 PR217.md

diff --git a/PR217.md b/PR217.md
deleted file mode 100644
index c5e64faf..00000000
--- a/PR217.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Recreate PR #216: Remove Defunct NWIS Functions
-
-This PR resubmits the changes from [PR #216](https://github.com/DOI-USGS/dataretrieval-python/pull/216) which removes four defunct NWIS legacy functions (`get_gwlevels`, `get_discharge_measurements`, `get_pmcodes`, and `get_water_use`) by replacing them with `NameError` exceptions that point users to the modernized `waterdata` equivalents.
-
-The original PR #216 was superseded by linting changes (PR #219) on the `main` branch before being merged, which caused conflicts. This PR correctly recreates the exact logical changes from PR #216 directly on top of the newly linted `main` branch, ensuring we maintain `ruff` compliance while formally deprecating the defunct functions as planned.
-
-All related tests and defunct data files have been removed, and the README has been updated to reflect the new API announcements identically to PR #216.
-
-### Notebook Modernization
-
-In addition to the core API changes, a comprehensive review and modernization of **16 demo notebooks** (including all legacy `hydroshare` examples) was performed:
-
-- **API Migration**: Legacy `nwis.get_dv()`, `nwis.get_iv()`, and `nwis.get_info()` calls were upgraded to their modern `waterdata` equivalents.
-- **Defunct Removal**: Defunct functions (`get_water_use`, `get_pmcodes`) were commented out or replaced with modern alternatives (`get_reference_table`) across all demos (e.g., `R Python Vignette`, `WaterUse` suite).
-- **Execution Validation**: All notebooks were successfully re-executed using `jupyter nbconvert` in the local `.venv` environment to ensure they remain functional and generate correct plots with the new OGC long-format data schema.
-- **Clean State**: Each notebook was processed with `nb-clean` to strip execution outputs and counts, ensuring a clean version-controlled state.
-- **Dependencies**: `scipy` and `mapclassify` were added to the environment to support advanced plotting and analytical features now required by the modernized examples.
-
-### Performance & Maintenance Optimizations
-
-A series of internal architectural improvements were implemented to enhance scalability and maintainability:
-
-- **Efficient Pagination**: Refactored `_walk_pages` in `waterdata/utils.py` to use list-based aggregation, reducing memory copying overhead from $O(n^2)$ to $O(n)$.
-- **Centralized Parameter Handling**: Introduced a shared `_get_args` helper and refactored all 11 API functions in `waterdata/api.py` to use it, eliminating over 100 lines of redundant dictionary comprehension logic.
-- **Utility Optimization**: Enhanced `to_str` in `utils.py` with `map(str, ...)` and broader iterable support (sets, tuples, generators), verified with new comprehensive unit tests.
-- **Improved Testing**: Added [waterdata_utils_test.py](file:///Users/thodson/Desktop/dev/software/dataretrieval-python/tests/waterdata_utils_test.py) and expanded `tests/utils_test.py` to ensure long-term stability of the new utility logic.
diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index 478dc656..62447987 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -45,6 +45,26 @@
 _CRS = "EPSG:4269"
 
 
+def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame:
+    """Parse a JSON NWIS response, raising a helpful error on HTML responses."""
+    try:
+        return _read_json(response.json())
+    except (ValueError, JSONDecodeError) as e:
+        text_lower = response.text.lower()
+        content_type = response.headers.get("Content-Type", "").lower()
+        if (
+            "<html>" in text_lower
+            or "<!doctype" in text_lower
+            or "text/html" in content_type
+        ):
+            raise ValueError(
+                f"Received HTML response instead of JSON from {response.url} "
+                f"(Status: {response.status_code}). This often indicates "
+                "that the service is currently unavailable."
+            ) from e
+        raise
+
+
 def format_response(
     df: pd.DataFrame, service: str | None = None, **kwargs
 ) -> pd.DataFrame:
@@ -482,20 +502,7 @@ def get_dv(
     kwargs["multi_index"] = multi_index
 
     response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
-    try:
-        df = _read_json(response.json())
-    except (ValueError, JSONDecodeError) as e:
-        if (
-            "<html>" in response.text.lower()
-            or "<!doctype" in response.text.lower()
-            or "text/html" in response.headers.get("Content-Type", "").lower()
-        ):
-            raise ValueError(
-                f"Received HTML response instead of JSON from {response.url} "
-                f"(Status: {response.status_code}). This often indicates "
-                "that the service is currently unavailable."
-            ) from e
-        raise
+    df = _parse_json_or_raise(response)
 
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
@@ -681,20 +688,7 @@ def get_iv(
         service="iv", format="json", ssl_check=ssl_check, **kwargs
     )
 
-    try:
-        df = _read_json(response.json())
-    except (ValueError, JSONDecodeError) as e:
-        if (
-            "<html>" in response.text.lower()
-            or "<!doctype" in response.text.lower()
-            or "text/html" in response.headers.get("Content-Type", "").lower()
-        ):
-            raise ValueError(
-                f"Received HTML response instead of JSON from {response.url} "
-                f"(Status: {response.status_code}). This often indicates "
-                "that the service is currently unavailable."
-            ) from e
-        raise
+    df = _parse_json_or_raise(response)
     return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
 
 
@@ -915,13 +909,19 @@ def get_record(
     """
     _check_sites_value_types(sites)
 
-    defunct_services = ["measurements", "gwlevels", "pmcodes", "water_use"]
-    if service in defunct_services:
+    defunct_replacements = {
+        "measurements": "`waterdata.get_field_measurements`",
+        "gwlevels": "`waterdata.get_field_measurements`",
+        "pmcodes": "`waterdata.get_reference_table`",
+        "water_use": "no replacement available",
+    }
+    if service in defunct_replacements:
         raise NameError(
-            f"The NWIS service '{service}' is no longer supported by get_record."
+            f"The NWIS service '{service}' is no longer supported by "
+            f"get_record. Use {defunct_replacements[service]} instead."
         )
 
-    if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES + defunct_services:
+    if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES:
         raise TypeError(f"Unrecognized service: {service}")
 
     if service == "iv":
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index f9754ac2..4198e025 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -610,9 +610,6 @@ def _walk_pages(
                 curr_url = None
 
         # Concatenate all pages at once for efficiency
-        if not dfs:
-            return pd.DataFrame(), initial_response
-
         return pd.concat(dfs, ignore_index=True), initial_response
     finally:
         if close_client: