diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index f566b19d..62447987 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -8,6 +8,7 @@ import warnings from io import StringIO +from json import JSONDecodeError import pandas as pd import requests @@ -44,6 +45,26 @@ _CRS = "EPSG:4269" +def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame: + """Parse a JSON NWIS response, raising a helpful error on HTML responses.""" + try: + return _read_json(response.json()) + except (ValueError, JSONDecodeError) as e: + text_lower = response.text.lower() + content_type = response.headers.get("Content-Type", "").lower() + if ( + "" in text_lower + or " pd.DataFrame: @@ -481,7 +502,7 @@ def get_dv( kwargs["multi_index"] = multi_index response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs) - df = _read_json(response.json()) + df = _parse_json_or_raise(response) return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) @@ -667,7 +688,7 @@ def get_iv( service="iv", format="json", ssl_check=ssl_check, **kwargs ) - df = _read_json(response.json()) + df = _parse_json_or_raise(response) return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) @@ -840,11 +861,11 @@ def get_record( - 'iv' : instantaneous data - 'dv' : daily mean data - 'site' : site description - - 'measurements' : discharge measurements + - 'measurements' : (defunct) use `waterdata.get_field_measurements` - 'peaks': discharge peaks - - 'gwlevels': groundwater levels - - 'pmcodes': get parameter codes - - 'water_use': get water use data + - 'gwlevels': (defunct) use `waterdata.get_field_measurements` + - 'pmcodes': (defunct) use `get_reference_table` + - 'water_use': (defunct) no replacement available - 'ratings': get rating table - 'stat': get statistics ssl_check: bool, optional @@ -870,29 +891,10 @@ def get_record( >>> # Get site description for site 01585200 >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site") - >>> # Get discharge measurements for site 01585200 - >>> df = dataretrieval.nwis.get_record( - ... sites="01585200", service="measurements" - ... ) >>> # Get discharge peaks for site 01585200 >>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks") - >>> # Get latest groundwater level for site 434400121275801 - >>> df = dataretrieval.nwis.get_record( - ... sites="434400121275801", service="gwlevels" - ... ) - - >>> # Get information about the discharge parameter code - >>> df = dataretrieval.nwis.get_record( - ... service="pmcodes", parameterCd="00060" - ... ) - - >>> # Get water use data for livestock nationally in 2010 - >>> df = dataretrieval.nwis.get_record( - ... service="water_use", years="2010", categories="L" - ... ) - >>> # Get rating table for USGS streamgage 01585200 >>> df = dataretrieval.nwis.get_record(sites="01585200", service="ratings") @@ -907,6 +909,18 @@ def get_record( """ _check_sites_value_types(sites) + defunct_replacements = { + "measurements": "`waterdata.get_field_measurements`", + "gwlevels": "`waterdata.get_field_measurements`", + "pmcodes": "`waterdata.get_reference_table`", + "water_use": "no replacement available", + } + if service in defunct_replacements: + raise NameError( + f"The NWIS service '{service}' is no longer supported by " + f"get_record. Use {defunct_replacements[service]} instead." + ) + if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES: raise TypeError(f"Unrecognized service: {service}") @@ -936,43 +950,6 @@ def get_record( df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs) return df - elif service == "measurements": - df, _ = get_discharge_measurements( - site_no=sites, begin_date=start, end_date=end, ssl_check=ssl_check, **kwargs - ) - return df - - elif service == "peaks": - df, _ = get_discharge_peaks( - site_no=sites, - begin_date=start, - end_date=end, - multi_index=multi_index, - ssl_check=ssl_check, - **kwargs, - ) - return df - - elif service == "gwlevels": - df, _ = get_gwlevels( - sites=sites, - startDT=start, - endDT=end, - multi_index=multi_index, - datetime_index=datetime_index, - ssl_check=ssl_check, - **kwargs, - ) - return df - - elif service == "pmcodes": - df, _ = get_pmcodes(ssl_check=ssl_check, **kwargs) - return df - - elif service == "water_use": - df, _ = get_water_use(state=state, ssl_check=ssl_check, **kwargs) - return df - elif service == "ratings": df, _ = get_ratings(site=sites, ssl_check=ssl_check, **kwargs) return df @@ -1167,8 +1144,8 @@ class NWIS_Metadata(BaseMetadata): Site information if the query included `site_no`, `sites`, `stateCd`, `huc`, `countyCd` or `bBox`. `site_no` is preferred over `sites` if both are present. - variable_info: tuple[pd.DataFrame, NWIS_Metadata] | None - Variable information if the query included `parameterCd`. + variable_info: None + Deprecated. Accessing variable_info via NWIS_Metadata is deprecated. """ @@ -1232,7 +1209,15 @@ def site_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None: return None # don't set metadata site_info attribute @property - def variable_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None: - # define variable_info metadata based on parameterCd if available - if "parameterCd" in self._parameters: - return get_pmcodes(parameterCd=self._parameters["parameterCd"]) + def variable_info(self) -> None: + """ + Deprecated. Accessing variable_info via NWIS_Metadata is deprecated. + Returns None. + """ + warnings.warn( + "Accessing variable_info via NWIS_Metadata is deprecated as " + "it relies on the defunct get_pmcodes function.", + DeprecationWarning, + stacklevel=2, + ) + return None diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 338238b5..4aa76a61 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -3,6 +3,7 @@ """ import warnings +from collections.abc import Iterable import pandas as pd import requests @@ -39,14 +40,13 @@ def to_str(listlike, delimiter=","): '0+10+42' """ - if isinstance(listlike, list): - return delimiter.join([str(x) for x in listlike]) + if isinstance(listlike, str): + return listlike - elif isinstance(listlike, (pd.core.series.Series, pd.core.indexes.base.Index)): - return delimiter.join(listlike.tolist()) + if isinstance(listlike, Iterable): + return delimiter.join(map(str, listlike)) - elif isinstance(listlike, str): - return listlike + return None def format_datetime(df, date_field, time_field, tz_field): @@ -212,6 +212,11 @@ def query(url, payload, delimiter=",", ssl_check=True): + f"API response reason: {_reason}. Pseudo-code example of how to " + f"split your query: \n {_example}" ) + elif response.status_code in [500, 502, 503]: + raise ValueError( + f"Service Unavailable: {response.status_code} {response.reason}. " + + f"The service at {response.url} may be down or experiencing issues." + ) if response.text.startswith("No sites/data"): raise NoSitesError(response.url) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index 9bc6a7f7..b2310e7a 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -26,6 +26,7 @@ SAMPLES_URL, _check_profiles, _default_headers, + _get_args, get_ogc_data, get_stats_data, ) @@ -208,11 +209,7 @@ def get_daily( output_id = "daily_id" # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -378,11 +375,7 @@ def get_continuous( output_id = "continuous_id" # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -673,11 +666,7 @@ def get_monitoring_locations( output_id = "monitoring_location_id" # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -893,11 +882,7 @@ def get_time_series_metadata( output_id = "time_series_id" # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -1069,11 +1054,7 @@ def get_latest_continuous( output_id = "latest_continuous_id" # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -1247,11 +1228,7 @@ def get_latest_daily( output_id = "latest_daily_id" # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -1424,11 +1401,7 @@ def get_field_measurements( output_id = "field_measurement_id" # Build argument dictionary, omitting None values - args = { - k: v - for k, v in locals().items() - if k not in {"service", "output_id"} and v is not None - } + args = _get_args(locals()) return get_ogc_data(args, output_id, service) @@ -1735,11 +1708,8 @@ def get_samples( _check_profiles(service, profile) - params = { - k: v - for k, v in locals().items() - if k not in ["ssl_check", "service", "profile"] and v is not None - } + # Build argument dictionary, omitting None values + params = _get_args(locals(), exclude={"ssl_check", "profile"}) params.update({"mimeType": "text/csv"}) @@ -1879,11 +1849,8 @@ def get_stats_por( ... end_date="01-31", ... ) """ - params = { - k: v - for k, v in locals().items() - if k not in ["expand_percentiles"] and v is not None - } + # Build argument dictionary, omitting None values + params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( args=params, service="observationNormals", expand_percentiles=expand_percentiles @@ -2011,11 +1978,8 @@ def get_stats_date_range( ... computation_type=["minimum", "maximum"], ... ) """ - params = { - k: v - for k, v in locals().items() - if k not in ["expand_percentiles"] and v is not None - } + # Build argument dictionary, omitting None values + params = _get_args(locals(), exclude={"expand_percentiles"}) return get_stats_data( args=params, diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index b4ae26bc..4198e025 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -588,7 +588,8 @@ def _walk_pages( headers = dict(req.headers) content = req.body if method == "POST" else None - dfs = _get_resp_data(resp, geopd=geopd) + # List to collect dataframes from each page + dfs = [_get_resp_data(resp, geopd=geopd)] curr_url = _next_req_url(resp) while curr_url: try: @@ -598,8 +599,7 @@ def _walk_pages( headers=headers, data=content if method == "POST" else None, ) - df1 = _get_resp_data(resp, geopd=geopd) - dfs = pd.concat([dfs, df1], ignore_index=True) + dfs.append(_get_resp_data(resp, geopd=geopd)) curr_url = _next_req_url(resp) except Exception: # noqa: BLE001 error_text = _error_body(resp) @@ -608,7 +608,9 @@ def _walk_pages( "Request failed for URL: %s. Data download interrupted.", curr_url ) curr_url = None - return dfs, initial_response + + # Concatenate all pages at once for efficiency + return pd.concat(dfs, ignore_index=True), initial_response finally: if close_client: client.close() @@ -1104,3 +1106,34 @@ def _check_profiles( f"Invalid profile: '{profile}' for service '{service}'. " f"Valid options are: {valid_profiles}." ) + + +def _get_args( + local_vars: dict[str, Any], exclude: set[str] | None = None +) -> dict[str, Any]: + """ + Standardize parameter filtering for WaterData API functions. + + Filters out internal function arguments ('service', 'output_id') + and None values from the provided local variables dictionary. + Additional variables can be excluded via the 'exclude' parameter. + + Parameters + ---------- + local_vars : dict[str, Any] + Dictionary of local variables, typically from `locals()`. + exclude : set[str], optional + Additional keys to exclude from the resulting dictionary. + + Returns + ------- + dict[str, Any] + Filtered dictionary of arguments for API requests. + """ + to_exclude = {"service", "output_id"} + if exclude: + to_exclude.update(exclude) + + return { + k: v for k, v in local_vars.items() if k not in to_exclude and v is not None + } diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 417ebf0f..0b53e387 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -154,7 +154,7 @@ def get_results( response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) @@ -208,7 +208,7 @@ def what_sites( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) @@ -263,7 +263,7 @@ def what_organizations( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) @@ -314,7 +314,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) @@ -378,7 +378,7 @@ def what_activities( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) @@ -440,7 +440,7 @@ def what_detection_limits( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) @@ -495,7 +495,7 @@ def what_habitat_metrics( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) @@ -551,7 +551,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) return df, WQP_Metadata(response) diff --git a/tests/data/nwis_iv_empty_mock.json b/tests/data/nwis_iv_empty_mock.json new file mode 100644 index 00000000..7b69d6d5 --- /dev/null +++ b/tests/data/nwis_iv_empty_mock.json @@ -0,0 +1,5 @@ +{ + "value": { + "timeSeries": [] + } +} \ No newline at end of file diff --git a/tests/data/nwis_iv_mock.json b/tests/data/nwis_iv_mock.json new file mode 100644 index 00000000..5c84f9f8 --- /dev/null +++ b/tests/data/nwis_iv_mock.json @@ -0,0 +1,84 @@ +{ + "value": { + "timeSeries": [ + { + "sourceInfo": { + "siteCode": [ + { + "value": "03339000" + } + ] + }, + "variable": { + "variableCode": [ + { + "value": "00060" + } + ], + "options": { + "option": [ + { + "value": "mean" + } + ] + } + }, + "values": [ + { + "method": [ + { + "methodDescription": "mean" + } + ], + "value": [ + { + "value": "1.0", + "dateTime": "2018-01-24T00:00:00Z", + "qualifiers": "A" + } + ] + } + ] + }, + { + "sourceInfo": { + "siteCode": [ + { + "value": "05447500" + } + ] + }, + "variable": { + "variableCode": [ + { + "value": "00060" + } + ], + "options": { + "option": [ + { + "value": "mean" + } + ] + } + }, + "values": [ + { + "method": [ + { + "methodDescription": "mean" + } + ], + "value": [ + { + "value": "2.0", + "dateTime": "2018-01-24T00:00:00Z", + "qualifiers": "A" + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/tests/nwis_test.py b/tests/nwis_test.py index c0f4d4b2..b31f2c22 100644 --- a/tests/nwis_test.py +++ b/tests/nwis_test.py @@ -1,4 +1,6 @@ import datetime +import json +from pathlib import Path from unittest import mock import numpy as np @@ -7,9 +9,14 @@ from dataretrieval.nwis import ( NWIS_Metadata, + get_discharge_measurements, + get_gwlevels, get_info, get_iv, + get_pmcodes, + get_qwdata, get_record, + get_water_use, preformat_peaks_response, what_sites, ) @@ -21,17 +28,35 @@ SITENO_COL = "site_no" -def test_iv_service(): - """Unit test of instantaneous value service""" +def _load_mock_json(file_name): + """Helper to load mock JSON from tests/data.""" + path = Path(__file__).parent / "data" / file_name + with open(path, encoding="utf-8") as f: + return json.load(f) + + +def _test_iv_service(requests_mock): + """Mocked test of instantaneous value service""" start = START_DATE end = END_DATE service = "iv" site = ["03339000", "05447500", "03346500"] + + # We use a very simple JSON structure just to satisfy the parser + mock_json = _load_mock_json("nwis_iv_mock.json") + + # Match the base URL and ensure query parameters are correct + requests_mock.get( + "https://waterservices.usgs.gov/nwis/iv", + json=mock_json, + complete_qs=False, + ) + return get_record(site, start, end, service=service) -def test_iv_service_answer(): - df = test_iv_service() +def test_iv_service_answer(requests_mock): + df = _test_iv_service(requests_mock) # check multiindex function assert df.index.names == [ SITENO_COL, @@ -39,6 +64,38 @@ def test_iv_service_answer(): ], f"iv service returned incorrect index: {df.index.names}" +def test_nwis_service_live(): + """Live sanity check of NWIS service, tolerant of transient NWIS outages.""" + site = "01491000" + try: + # Minimal query: just most recent record + get_iv(sites=site) + except ValueError as e: + # Catch known transient service failures surfaced as ValueError + error_text = str(e) + if any( + err in error_text + for err in [ + "500", + "502", + "503", + "Service Unavailable", + "Received HTML response instead of JSON", + ] + ): + pytest.skip( + f"Service is currently unavailable (transient NWIS outage): {e}" + ) + raise + except Exception as e: + # Fallback for other potential transient network issues + if "Expecting value" in str(e) or "JSON" in str(e): + pytest.skip( + f"Service returned invalid response (likely transient outage): {e}" + ) + raise + + def test_preformat_peaks_response(): # make a data frame with a "peak_dt" datetime column # it will have some nan and none values @@ -52,91 +109,52 @@ def test_preformat_peaks_response(): assert df["datetime"].isna().sum() == 0 -if __name__ == "__main__": - test_iv_service_answer() - - # tests using real queries to USGS webservices # these specific queries represent some edge-cases and the tests to address # incomplete date-time information -@pytest.mark.xfail(reason="Modern service does not return incomplete dates.") -def test_inc_date_01(): - """Test based on GitHub Issue #47 - lack of timestamp for measurement.""" - site = "403451073585601" - # make call expecting a warning to be thrown due to incomplete dates - with pytest.warns(UserWarning) as record: - df = get_record(site, "1980-01-01", "1990-01-01", service="gwlevels") - - if len(df) == 0: - pytest.skip(f"Site {site} returned no data.") - - assert len(record) > 0 - # assert that there are indeed incomplete dates - assert pd.isna(df.index).any() - # assert that the datetime index is there - assert df.index.name == "datetime" - # make call without defining a datetime index and check that it isn't there - df2 = get_record( - site, "1980-01-01", "1990-01-01", service="gwlevels", datetime_index=False - ) - # assert shape of both dataframes is the same (contain the same data) - assert df.shape == df2.shape - # assert that the datetime index is not there - assert df2.index.name != "datetime" - - -@pytest.mark.xfail(reason="Modern service does not return incomplete dates.") -def test_inc_date_02(): - """Test based on GitHub Issue #47 - lack of month, day, or time.""" - site = "180049066381200" - # make call expecting a warning to be thrown due to incomplete dates - with pytest.warns(UserWarning) as record: - df = get_record(site, "1900-01-01", "2013-01-01", service="gwlevels") - - if len(df) == 0: - pytest.skip(f"Site {site} returned no data.") - - assert len(record) > 0 - # assert that there are indeed incomplete dates - assert pd.isna(df.index).any() - # assert that the datetime index is there - assert df.index.name == "datetime" - # make call without defining a datetime index and check that it isn't there - df2 = get_record( - site, "1900-01-01", "2013-01-01", service="gwlevels", datetime_index=False - ) - # assert shape of both dataframes is the same (contain the same data) - assert df.shape == df2.shape - # assert that the datetime index is not there - assert df2.index.name != "datetime" - - -@pytest.mark.xfail(reason="Modern service does not return incomplete dates.") -def test_inc_date_03(): - """Test based on GitHub Issue #47 - lack of day, and times.""" - site = "290000095192602" - # make call expecting a warning to be thrown due to incomplete dates - with pytest.warns(UserWarning) as record: - df = get_record(site, "1975-01-01", "2000-01-01", service="gwlevels") - - if len(df) == 0: - pytest.skip(f"Site {site} returned no data.") - - assert len(record) > 0 - # assert that there are indeed incomplete dates - assert pd.isna(df.index).any() - # assert that the datetime index is there - assert df.index.name == "datetime" - # make call without defining a datetime index and check that it isn't there - df2 = get_record( - site, "1975-01-01", "2000-01-01", service="gwlevels", datetime_index=False - ) - # assert shape of both dataframes is the same (contain the same data) - assert df.shape == df2.shape - # assert that the datetime index is not there - assert df2.index.name != "datetime" +# Removed defunct gwlevels tests. + + +class TestDefunct: + """Verify that defunct functions raise NameError.""" + + def test_get_qwdata_raises(self): + with pytest.raises(NameError, match="get_qwdata"): + get_qwdata() + + def test_get_discharge_measurements_raises(self): + with pytest.raises(NameError, match="get_discharge_measurements"): + get_discharge_measurements() + + def test_get_gwlevels_raises(self): + with pytest.raises(NameError, match="get_gwlevels"): + get_gwlevels() + + def test_get_pmcodes_raises(self): + with pytest.raises(NameError, match="get_pmcodes"): + get_pmcodes() + + def test_get_water_use_raises(self): + with pytest.raises(NameError, match="get_water_use"): + get_water_use() + + def test_get_record_defunct_service_measurements(self): + with pytest.raises(NameError, match="no longer supported by get_record"): + get_record(service="measurements") + + def test_get_record_defunct_service_gwlevels(self): + with pytest.raises(NameError, match="no longer supported by get_record"): + get_record(service="gwlevels") + + def test_get_record_defunct_service_pmcodes(self): + with pytest.raises(NameError, match="no longer supported by get_record"): + get_record(service="pmcodes") + + def test_get_record_defunct_service_water_use(self): + with pytest.raises(NameError, match="no longer supported by get_record"): + get_record(service="water_use") class TestTZ: @@ -211,11 +229,21 @@ def test_expandedrdb_get_info(self): assert "count_nu" not in data.columns -def test_empty_timeseries(): +def test_empty_timeseries(requests_mock): """Test based on empty case from GitHub Issue #26.""" - df = get_record( - sites="011277906", service="iv", start="2010-07-20", end="2010-07-20" + sites = "011277906" + start = "2010-07-20" + end = "2010-07-20" + + mock_json = _load_mock_json("nwis_iv_empty_mock.json") + # Match the base URL and ensure query parameters are correct + requests_mock.get( + "https://waterservices.usgs.gov/nwis/iv", + json=mock_json, + complete_qs=False, ) + + df = get_record(sites=sites, service="iv", start=start, end=end) assert df.empty is True @@ -282,3 +310,14 @@ def test_set_metadata_info_countyCd(self): md = NWIS_Metadata(response, countyCd="01001") # assert that site_info is implemented assert md.site_info + + def test_variable_info_deprecated(self): + """Test that variable_info raises a DeprecationWarning and returns None.""" + response = mock.MagicMock() + md = NWIS_Metadata(response) + with pytest.warns( + DeprecationWarning, + match="Accessing variable_info via NWIS_Metadata is deprecated", + ): + result = md.variable_info + assert result is None diff --git a/tests/utils_test.py b/tests/utils_test.py index 53ccb213..4cb9b383 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -2,6 +2,7 @@ from unittest import mock +import pandas as pd import pytest from dataretrieval import nwis, utils @@ -54,7 +55,45 @@ def test_init_with_response(self): assert md.header is not None # Test NotImplementedError parameters - with pytest.raises(NotImplementedError): - _ = md.site_info with pytest.raises(NotImplementedError): _ = md.variable_info + + +class Test_to_str: + """Tests of the to_str function.""" + + def test_to_str_list(self): + assert utils.to_str([1, "a", 2]) == "1,a,2" + + def test_to_str_tuple(self): + assert utils.to_str((1, "b", 3)) == "1,b,3" + + def test_to_str_set(self): + # Sets are unordered, so we check if elements are present + result = utils.to_str({1, 2}) + assert "1" in result + assert "2" in result + assert "," in result + + def test_to_str_generator(self): + def gen(): + yield from [1, 2, 3] + + assert utils.to_str(gen()) == "1,2,3" + + def test_to_str_pandas_series(self): + s = pd.Series([10, 20]) + assert utils.to_str(s) == "10,20" + + def test_to_str_pandas_index(self): + idx = pd.Index(["x", "y"]) + assert utils.to_str(idx) == "x,y" + + def test_to_str_string(self): + assert utils.to_str("already a string") == "already a string" + + def test_to_str_custom_delimiter(self): + assert utils.to_str([1, 2, 3], delimiter="|") == "1|2|3" + + def test_to_str_non_iterable(self): + assert utils.to_str(123) is None diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py new file mode 100644 index 00000000..772f75b7 --- /dev/null +++ b/tests/waterdata_utils_test.py @@ -0,0 +1,79 @@ +from unittest import mock + +import requests + +from dataretrieval.waterdata.utils import _get_args, _walk_pages + + +def test_get_args_basic(): + local_vars = { + "monitoring_location_id": "123", + "service": "daily", + "output_id": "daily_id", + "none_val": None, + "other": "val", + } + result = _get_args(local_vars) + assert result == {"monitoring_location_id": "123", "other": "val"} + + +def test_get_args_with_exclude(): + local_vars = { + "monitoring_location_id": "123", + "service": "daily", + "output_id": "daily_id", + "to_exclude": "secret", + "other": "val", + } + result = _get_args(local_vars, exclude={"to_exclude"}) + assert result == {"monitoring_location_id": "123", "other": "val"} + + +def test_get_args_empty(): + assert _get_args({}) == {} + + +def test_walk_pages_multiple_mocked(): + # Setup mock responses + resp1 = mock.MagicMock() + resp1.json.return_value = { + "numberReturned": 1, + "features": [{"id": "1", "properties": {"val": "a"}}], + "links": [{"rel": "next", "href": "https://example.com/page2"}], + } + # Mock headers and links + resp1.headers = {} + resp1.links = {"next": {"url": "https://example.com/page2"}} + resp1.status_code = 200 + + resp2 = mock.MagicMock() + resp2.json.return_value = { + "numberReturned": 1, + "features": [{"id": "2", "properties": {"val": "b"}}], + "links": [], + } + resp2.headers = {} + resp2.links = {} + resp2.status_code = 200 + + # Mock client (Session) + mock_client = mock.MagicMock(spec=requests.Session) + # First call to send() returns resp1, then call to request() in loop returns resp2 + mock_client.send.return_value = resp1 + mock_client.request.return_value = resp2 + + # Mock request (PreparedRequest) + mock_req = mock.MagicMock(spec=requests.PreparedRequest) + mock_req.method = "GET" + mock_req.headers = {} + mock_req.url = "https://example.com/page1" + + # Call _walk_pages + df, final_resp = _walk_pages(geopd=False, req=mock_req, client=mock_client) + + assert len(df) == 2 + assert list(df["val"]) == ["a", "b"] + assert list(df["id"]) == ["1", "2"] + assert mock_client.send.called + assert mock_client.request.called + assert mock_client.request.call_args[0][1] == "https://example.com/page2"