Skip to content
121 changes: 53 additions & 68 deletions dataretrieval/nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import warnings
from io import StringIO
from json import JSONDecodeError

import pandas as pd
import requests
Expand Down Expand Up @@ -44,6 +45,26 @@
_CRS = "EPSG:4269"


def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame:
"""Parse a JSON NWIS response, raising a helpful error on HTML responses."""
try:
return _read_json(response.json())
except (ValueError, JSONDecodeError) as e:
text_lower = response.text.lower()
content_type = response.headers.get("Content-Type", "").lower()
if (
"<html>" in text_lower
or "<!doctype" in text_lower
or "text/html" in content_type
):
raise ValueError(
f"Received HTML response instead of JSON from {response.url} "
f"(Status: {response.status_code}). This often indicates "
"that the service is currently unavailable."
) from e
raise


def format_response(
df: pd.DataFrame, service: str | None = None, **kwargs
) -> pd.DataFrame:
Expand Down Expand Up @@ -481,7 +502,7 @@ def get_dv(
kwargs["multi_index"] = multi_index

response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
df = _read_json(response.json())
df = _parse_json_or_raise(response)

return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)

Expand Down Expand Up @@ -667,7 +688,7 @@ def get_iv(
service="iv", format="json", ssl_check=ssl_check, **kwargs
)

df = _read_json(response.json())
df = _parse_json_or_raise(response)
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)


Expand Down Expand Up @@ -840,11 +861,11 @@ def get_record(
- 'iv' : instantaneous data
- 'dv' : daily mean data
- 'site' : site description
- 'measurements' : discharge measurements
- 'measurements' : (defunct) use `waterdata.get_field_measurements`
- 'peaks': discharge peaks
- 'gwlevels': groundwater levels
- 'pmcodes': get parameter codes
- 'water_use': get water use data
- 'gwlevels': (defunct) use `waterdata.get_field_measurements`
- 'pmcodes': (defunct) use `get_reference_table`
- 'water_use': (defunct) no replacement available
- 'ratings': get rating table
- 'stat': get statistics
ssl_check: bool, optional
Expand All @@ -870,29 +891,10 @@ def get_record(
>>> # Get site description for site 01585200
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")

>>> # Get discharge measurements for site 01585200
>>> df = dataretrieval.nwis.get_record(
... sites="01585200", service="measurements"
... )

>>> # Get discharge peaks for site 01585200
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks")

>>> # Get latest groundwater level for site 434400121275801
>>> df = dataretrieval.nwis.get_record(
... sites="434400121275801", service="gwlevels"
... )

>>> # Get information about the discharge parameter code
>>> df = dataretrieval.nwis.get_record(
... service="pmcodes", parameterCd="00060"
... )

>>> # Get water use data for livestock nationally in 2010
>>> df = dataretrieval.nwis.get_record(
... service="water_use", years="2010", categories="L"
... )

>>> # Get rating table for USGS streamgage 01585200
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="ratings")

Expand All @@ -907,6 +909,18 @@ def get_record(
"""
_check_sites_value_types(sites)

defunct_replacements = {
"measurements": "`waterdata.get_field_measurements`",
"gwlevels": "`waterdata.get_field_measurements`",
"pmcodes": "`waterdata.get_reference_table`",
"water_use": "no replacement available",
}
if service in defunct_replacements:
raise NameError(
f"The NWIS service '{service}' is no longer supported by "
f"get_record. Use {defunct_replacements[service]} instead."
)

if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES:
raise TypeError(f"Unrecognized service: {service}")

Comment thread
thodson-usgs marked this conversation as resolved.
Expand Down Expand Up @@ -936,43 +950,6 @@ def get_record(
df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs)
return df

elif service == "measurements":
df, _ = get_discharge_measurements(
site_no=sites, begin_date=start, end_date=end, ssl_check=ssl_check, **kwargs
)
return df

elif service == "peaks":
df, _ = get_discharge_peaks(
site_no=sites,
begin_date=start,
end_date=end,
multi_index=multi_index,
ssl_check=ssl_check,
**kwargs,
)
return df

elif service == "gwlevels":
df, _ = get_gwlevels(
sites=sites,
startDT=start,
endDT=end,
multi_index=multi_index,
datetime_index=datetime_index,
ssl_check=ssl_check,
**kwargs,
)
return df

elif service == "pmcodes":
df, _ = get_pmcodes(ssl_check=ssl_check, **kwargs)
return df

elif service == "water_use":
df, _ = get_water_use(state=state, ssl_check=ssl_check, **kwargs)
return df

elif service == "ratings":
df, _ = get_ratings(site=sites, ssl_check=ssl_check, **kwargs)
return df
Expand Down Expand Up @@ -1167,8 +1144,8 @@ class NWIS_Metadata(BaseMetadata):
Site information if the query included `site_no`, `sites`, `stateCd`,
`huc`, `countyCd` or `bBox`. `site_no` is preferred over `sites` if
both are present.
variable_info: tuple[pd.DataFrame, NWIS_Metadata] | None
Variable information if the query included `parameterCd`.
variable_info: None
Deprecated. Accessing variable_info via NWIS_Metadata is deprecated.

"""

Expand Down Expand Up @@ -1232,7 +1209,15 @@ def site_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
return None # don't set metadata site_info attribute

@property
def variable_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
# define variable_info metadata based on parameterCd if available
if "parameterCd" in self._parameters:
return get_pmcodes(parameterCd=self._parameters["parameterCd"])
def variable_info(self) -> None:
"""
Deprecated. Accessing variable_info via NWIS_Metadata is deprecated.
Returns None.
"""
warnings.warn(
"Accessing variable_info via NWIS_Metadata is deprecated as "
"it relies on the defunct get_pmcodes function.",
DeprecationWarning,
stacklevel=2,
)
return None
Comment thread
thodson-usgs marked this conversation as resolved.
17 changes: 11 additions & 6 deletions dataretrieval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import warnings
from collections.abc import Iterable

import pandas as pd
import requests
Expand Down Expand Up @@ -39,14 +40,13 @@ def to_str(listlike, delimiter=","):
'0+10+42'

"""
if isinstance(listlike, list):
return delimiter.join([str(x) for x in listlike])
if isinstance(listlike, str):
return listlike

elif isinstance(listlike, (pd.core.series.Series, pd.core.indexes.base.Index)):
return delimiter.join(listlike.tolist())
if isinstance(listlike, Iterable):
return delimiter.join(map(str, listlike))

elif isinstance(listlike, str):
return listlike
return None


def format_datetime(df, date_field, time_field, tz_field):
Expand Down Expand Up @@ -212,6 +212,11 @@ def query(url, payload, delimiter=",", ssl_check=True):
+ f"API response reason: {_reason}. Pseudo-code example of how to "
+ f"split your query: \n {_example}"
)
elif response.status_code in [500, 502, 503]:
raise ValueError(
f"Service Unavailable: {response.status_code} {response.reason}. "
+ f"The service at {response.url} may be down or experiencing issues."
)
Comment thread
thodson-usgs marked this conversation as resolved.

if response.text.startswith("No sites/data"):
raise NoSitesError(response.url)
Expand Down
64 changes: 14 additions & 50 deletions dataretrieval/waterdata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
SAMPLES_URL,
_check_profiles,
_default_headers,
_get_args,
get_ogc_data,
get_stats_data,
)
Expand Down Expand Up @@ -208,11 +209,7 @@ def get_daily(
output_id = "daily_id"

# Build argument dictionary, omitting None values
args = {
k: v
for k, v in locals().items()
if k not in {"service", "output_id"} and v is not None
}
args = _get_args(locals())

return get_ogc_data(args, output_id, service)

Expand Down Expand Up @@ -378,11 +375,7 @@ def get_continuous(
output_id = "continuous_id"

# Build argument dictionary, omitting None values
args = {
k: v
for k, v in locals().items()
if k not in {"service", "output_id"} and v is not None
}
args = _get_args(locals())

return get_ogc_data(args, output_id, service)

Expand Down Expand Up @@ -673,11 +666,7 @@ def get_monitoring_locations(
output_id = "monitoring_location_id"

# Build argument dictionary, omitting None values
args = {
k: v
for k, v in locals().items()
if k not in {"service", "output_id"} and v is not None
}
args = _get_args(locals())

return get_ogc_data(args, output_id, service)

Expand Down Expand Up @@ -893,11 +882,7 @@ def get_time_series_metadata(
output_id = "time_series_id"

# Build argument dictionary, omitting None values
args = {
k: v
for k, v in locals().items()
if k not in {"service", "output_id"} and v is not None
}
args = _get_args(locals())

return get_ogc_data(args, output_id, service)

Expand Down Expand Up @@ -1069,11 +1054,7 @@ def get_latest_continuous(
output_id = "latest_continuous_id"

# Build argument dictionary, omitting None values
args = {
k: v
for k, v in locals().items()
if k not in {"service", "output_id"} and v is not None
}
args = _get_args(locals())

return get_ogc_data(args, output_id, service)

Expand Down Expand Up @@ -1247,11 +1228,7 @@ def get_latest_daily(
output_id = "latest_daily_id"

# Build argument dictionary, omitting None values
args = {
k: v
for k, v in locals().items()
if k not in {"service", "output_id"} and v is not None
}
args = _get_args(locals())

return get_ogc_data(args, output_id, service)

Expand Down Expand Up @@ -1424,11 +1401,7 @@ def get_field_measurements(
output_id = "field_measurement_id"

# Build argument dictionary, omitting None values
args = {
k: v
for k, v in locals().items()
if k not in {"service", "output_id"} and v is not None
}
args = _get_args(locals())

return get_ogc_data(args, output_id, service)

Expand Down Expand Up @@ -1735,11 +1708,8 @@ def get_samples(

_check_profiles(service, profile)

params = {
k: v
for k, v in locals().items()
if k not in ["ssl_check", "service", "profile"] and v is not None
}
# Build argument dictionary, omitting None values
params = _get_args(locals(), exclude={"ssl_check", "profile"})

params.update({"mimeType": "text/csv"})

Expand Down Expand Up @@ -1879,11 +1849,8 @@ def get_stats_por(
... end_date="01-31",
... )
"""
params = {
k: v
for k, v in locals().items()
if k not in ["expand_percentiles"] and v is not None
}
# Build argument dictionary, omitting None values
params = _get_args(locals(), exclude={"expand_percentiles"})

return get_stats_data(
args=params, service="observationNormals", expand_percentiles=expand_percentiles
Expand Down Expand Up @@ -2011,11 +1978,8 @@ def get_stats_date_range(
... computation_type=["minimum", "maximum"],
... )
"""
params = {
k: v
for k, v in locals().items()
if k not in ["expand_percentiles"] and v is not None
}
# Build argument dictionary, omitting None values
params = _get_args(locals(), exclude={"expand_percentiles"})

return get_stats_data(
args=params,
Expand Down
Loading
Loading