diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 24e1737e..b8c95145 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -12,6 +12,7 @@ from __future__ import annotations import warnings +from functools import cached_property from io import StringIO from typing import TYPE_CHECKING @@ -22,6 +23,10 @@ if TYPE_CHECKING: from pandas import DataFrame +# WQP-native site key first, then legacy NWIS-style aliases. WQP_Metadata.site_info +# walks these in order and forwards whichever matched as what_sites' `siteid`. +_SITE_KEYS = ("siteid", "sites", "site", "site_no") + result_profiles_wqx3 = ["basicPhysChem", "fullPhysChem", "narrow"] result_profiles_legacy = ["biological", "narrowResult", "resultPhysChem"] @@ -147,7 +152,7 @@ def get_results( response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_sites( @@ -202,7 +207,7 @@ def what_sites( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_organizations( @@ -253,7 +258,7 @@ def what_organizations( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_projects(ssl_check=True, legacy=True, **kwargs): @@ -300,7 +305,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_activities( @@ -364,7 +369,7 @@ def what_activities( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_detection_limits( @@ -422,7 +427,7 @@ def what_detection_limits( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_habitat_metrics( @@ -473,7 +478,7 @@ def what_habitat_metrics( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_project_weights(ssl_check=True, legacy=True, **kwargs): @@ -525,7 +530,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): @@ -577,7 +582,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def wqp_url(service): @@ -615,17 +620,20 @@ class WQP_Metadata(BaseMetadata): ---------- url : str Response url - query_time : datetme.timedelta + query_time : datetime.timedelta Response elapsed time header : requests.structures.CaseInsensitiveDict Response headers - comments : None - Metadata comments. WQP does not return comments. - site_info : tuple[pd.DataFrame, NWIS_Metadata] | None - Site information if the query included `sites`, `site` or `site_no`. + comment : None + Metadata comment. WQP does not return comments. + site_info : tuple[pd.DataFrame, WQP_Metadata] | None + Site information if the query included a site filter (`siteid`, + `sites`, `site`, or `site_no`). """ - def __init__(self, response, **parameters) -> None: + def __init__( + self, response, legacy: bool = True, ssl_check: bool = True, **parameters + ) -> None: """Generates a standard set of metadata informed by the response with specific metadata for WQP data. @@ -633,9 +641,17 @@ def __init__(self, response, **parameters) -> None: ---------- response : Response Response object from requests module - + legacy : bool + Whether the originating request used the legacy WQX endpoint. + Forwarded to ``what_sites`` when ``site_info`` is accessed so the + resolved station metadata uses the same profile as the original + query. + ssl_check : bool + Whether the originating request verified SSL. Forwarded to + ``what_sites`` for consistency. parameters : dict - Unpacked dictionary of the parameters supplied in the request + Unpacked dictionary of the remaining parameters supplied in the + request. Returns ------- @@ -647,15 +663,19 @@ def __init__(self, response, **parameters) -> None: super().__init__(response) self._parameters = parameters - - @property - def site_info(self): - if "sites" in self._parameters: - return what_sites(sites=parameters["sites"]) - elif "site" in self._parameters: - return what_sites(sites=parameters["site"]) - elif "site_no" in self._parameters: - return what_sites(sites=parameters["site_no"]) + self._legacy = legacy + self._ssl_check = ssl_check + + @cached_property + def site_info(self): + for key in _SITE_KEYS: + if key in self._parameters: + return what_sites( + siteid=self._parameters[key], + legacy=self._legacy, + ssl_check=self._ssl_check, + ) + return None def _check_kwargs(kwargs): diff --git a/tests/wqp_test.py b/tests/wqp_test.py index a337f7ec..73ddba22 100644 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -1,9 +1,11 @@ import datetime +from unittest import mock import pytest from pandas import DataFrame from dataretrieval.wqp import ( + WQP_Metadata, _check_kwargs, get_results, what_activities, @@ -218,6 +220,69 @@ def test_check_kwargs(): kwargs = _check_kwargs(kwargs) +def test_wqp_metadata_site_info_property_resolves(requests_mock): + """`site_info` returns the `(DataFrame, WQP_Metadata)` tuple from `what_sites`.""" + results_url = ( + "https://www.waterqualitydata.us/data/Result/Search?" + "siteid=WIDNR_WQX-10032762&mimeType=csv" + ) + sites_url = ( + "https://www.waterqualitydata.us/data/Station/Search?" + "siteid=WIDNR_WQX-10032762&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp_results.txt") + mock_request(requests_mock, sites_url, "tests/data/wqp_sites.txt") + + _df, md = get_results(siteid="WIDNR_WQX-10032762") + + site_df, site_md = md.site_info + assert isinstance(site_df, DataFrame) + assert site_md.url == sites_url + + +def test_wqp_metadata_site_info_returns_none_without_site_filter(requests_mock): + """`site_info` returns None when no site filter was supplied.""" + results_url = ( + "https://www.waterqualitydata.us/data/Result/Search?" + "characteristicName=Chloride&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp_results.txt") + _df, md = get_results(characteristicName="Chloride") + assert md.site_info is None + + +def test_wqp_metadata_site_info_uses_wqx3_when_originating_query_was_wqx3( + requests_mock, +): + """`site_info` reuses the originating legacy/WQX3.0 profile.""" + results_url = ( + "https://www.waterqualitydata.us/wqx3/Result/search?" + "siteid=UTAHDWQ_WQX-4993795&mimeType=csv&dataProfile=fullPhysChem" + ) + sites_wqx3_url = ( + "https://www.waterqualitydata.us/wqx3/Station/search?" + "siteid=UTAHDWQ_WQX-4993795&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp3_results.txt") + mock_request(requests_mock, sites_wqx3_url, "tests/data/wqp_sites.txt") + + _df, md = get_results(legacy=False, siteid="UTAHDWQ_WQX-4993795") + _site_df, site_md = md.site_info + assert site_md.url == sites_wqx3_url + + +@pytest.mark.parametrize("key", ["siteid", "sites", "site", "site_no"]) +def test_wqp_metadata_site_info_forwards_each_alias_as_siteid(key): + """Every alias key resolves to `what_sites(siteid=value, ...)`.""" + with mock.patch("dataretrieval.wqp.what_sites") as fake_what_sites: + fake_what_sites.return_value = (DataFrame(), mock.Mock()) + md = WQP_Metadata(mock.Mock(), legacy=False, ssl_check=True, **{key: "USGS-X"}) + _ = md.site_info + fake_what_sites.assert_called_once_with( + siteid="USGS-X", legacy=False, ssl_check=True + ) + + def test_get_results_wqx3_preserves_user_dataProfile(requests_mock): """A valid user-supplied WQX3.0 profile must not be overwritten.