From 75ba16cde7182d680069d0595add17b2c8cd495c Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Wed, 6 May 2026 10:07:44 -0500 Subject: [PATCH] Add waterdata.get_field_measurements_metadata Wraps the OGC /collections/field-measurements-metadata collection. Returns one row per (location, parameter) field-measurement series describing its period of record, units, etc., without the underlying observations. Discrete-measurement analogue to get_time_series_metadata. Mirrors R's read_waterdata_field_meta in DOI-USGS/dataRetrieval, with the same output_id ("field_series_id") and parameter list. Body is the standard service-agnostic dispatch through get_ogc_data, with no new infrastructure required. Two live tests cover the single-site happy path and the multi-site POST path. Co-Authored-By: Claude Opus 4.7 (1M context) --- NEWS.md | 2 + dataretrieval/waterdata/__init__.py | 2 + dataretrieval/waterdata/api.py | 117 ++++++++++++++++++++++++++++ tests/waterdata_test.py | 31 ++++++++ 4 files changed, 152 insertions(+) diff --git a/NEWS.md b/NEWS.md index beabe9d8..0517a15a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +**05/06/2026:** Added `waterdata.get_field_measurements_metadata(...)` — wraps the OGC `field-measurements-metadata` collection. Returns one row per (location, parameter) field-measurement series describing its period of record, units, etc., without the underlying observations. Discrete-measurement analogue to `get_time_series_metadata`. Mirrors R's `read_waterdata_field_meta`. + **05/05/2026:** Added `waterdata.get_combined_metadata(...)` — wraps the Water Data API's `combined-metadata` collection, which joins the monitoring-locations catalog with the time-series-metadata catalog and returns one row per (location, parameter, statistic) inventory entry. This is the most flexible "what data is available" endpoint in the API: any location attribute (state, HUC, site type, drainage area, well-construction depth, …) can be combined with any time-series attribute (parameter code, statistic, data type, period of record, …) in a single query. Mirrors R's `read_waterdata_combined_meta`. **05/05/2026:** Added `waterdata.get_samples_summary(monitoringLocationIdentifier=...)` — wraps the Samples database `/summary/{id}` endpoint, returning per-characteristic result and activity counts plus first / most recent activity dates for a single monitoring location. Useful for taking inventory of available discrete-sample data before pulling observations with `get_samples`. diff --git a/dataretrieval/waterdata/__init__.py b/dataretrieval/waterdata/__init__.py index 22fb7d38..4ea7475a 100644 --- a/dataretrieval/waterdata/__init__.py +++ b/dataretrieval/waterdata/__init__.py @@ -17,6 +17,7 @@ get_continuous, get_daily, get_field_measurements, + get_field_measurements_metadata, get_latest_continuous, get_latest_daily, get_monitoring_locations, @@ -48,6 +49,7 @@ "get_continuous", "get_daily", "get_field_measurements", + "get_field_measurements_metadata", "get_latest_continuous", "get_latest_daily", "get_monitoring_locations", diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index b540912d..9ca4ba01 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -1761,6 +1761,123 @@ def get_field_measurements( return get_ogc_data(args, output_id, service) +def get_field_measurements_metadata( + monitoring_location_id: str | list[str] | None = None, + parameter_code: str | list[str] | None = None, + parameter_name: str | list[str] | None = None, + parameter_description: str | list[str] | None = None, + begin: str | list[str] | None = None, + end: str | list[str] | None = None, + last_modified: str | list[str] | None = None, + properties: str | list[str] | None = None, + skip_geometry: bool | None = None, + bbox: list[float] | None = None, + limit: int | None = None, + filter: str | None = None, + filter_lang: FILTER_LANG | None = None, + convert_type: bool = True, +) -> tuple[pd.DataFrame, BaseMetadata]: + """Get field-measurement metadata: one row per (location, parameter) series. + + Each row describes a single field-measurement series — what parameter is + measured at the location, the period of record (``begin`` / ``end``), the + units, and so on — without returning the underlying observations + themselves. Use :func:`get_field_measurements` to fetch the values. + + This is the discrete-measurement analogue to + :func:`get_time_series_metadata` (which describes daily and continuous + series). It's primarily useful for inventory queries: "what + field-measurement parameters does this site have, and over what date + range?" + + See the OpenAPI reference for the full list of supported fields: + https://api.waterdata.usgs.gov/ogcapi/v0/openapi?f=html#/field-measurements-metadata + The R analogue is ``read_waterdata_field_meta`` in + https://github.com/DOI-USGS/dataRetrieval/. + + Parameters + ---------- + monitoring_location_id : string or list of strings, optional + A unique identifier representing a single monitoring location, in + ``AGENCY-ID`` form (e.g. ``"USGS-02238500"``). + parameter_code : string or list of strings, optional + 5-digit parameter code. See + https://help.waterdata.usgs.gov/codes-and-parameters/parameters. + parameter_name : string or list of strings, optional + A human-understandable name corresponding to ``parameter_code``. + parameter_description : string or list of strings, optional + A human-readable description of what is being measured. + begin, end, last_modified : string, optional + Datetime fields that accept either an RFC 3339 datetime, an + interval (``"start/end"``, optionally half-bounded with ``..``), + or an ISO 8601 duration (e.g. ``"P1M"``, ``"PT36H"``). See + :func:`get_time_series_metadata` for the full grammar. + properties : string or list of strings, optional + Subset of columns to return. Defaults to every available property. + skip_geometry : boolean, optional + Skip per-feature geometries; the returned object will be a plain + ``DataFrame`` with no spatial information. + bbox : list of numbers, optional + Only features whose geometry intersects the bounding box are + selected. Format: ``[xmin, ymin, xmax, ymax]`` in CRS 4326 + (longitude / latitude, west-south-east-north). + limit : numeric, optional + Page size; the maximum allowable value is 50000. Default + (``None``) requests the maximum allowable limit. + filter, filter_lang : optional + Server-side CQL filter passed through as the OGC ``filter`` / + ``filter-lang`` query parameters. See + :mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking, + and the lexicographic-comparison pitfall. + convert_type : boolean, optional + If True, converts columns to appropriate types. + + Returns + ------- + df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame`` + Formatted data returned from the API query. + md : :obj:`dataretrieval.utils.Metadata` + A custom metadata object pertaining to the query. + + Examples + -------- + .. code:: + + >>> # All field-measurement series at a surface-water site + >>> df, md = dataretrieval.waterdata.get_field_measurements_metadata( + ... monitoring_location_id="USGS-02238500" + ... ) + + >>> # Same, for a groundwater well + >>> df, md = dataretrieval.waterdata.get_field_measurements_metadata( + ... monitoring_location_id="USGS-375907091432201" + ... ) + + >>> # Multi-site, narrowed to two parameter codes + >>> df, md = dataretrieval.waterdata.get_field_measurements_metadata( + ... monitoring_location_id=[ + ... "USGS-451605097071701", + ... "USGS-263819081585801", + ... ], + ... parameter_code=["62611", "72019"], + ... ) + + >>> # Series modified in the last year — useful for incremental ETL + >>> df, md = dataretrieval.waterdata.get_field_measurements_metadata( + ... monitoring_location_id="USGS-375907091432201", + ... parameter_code="72019", + ... last_modified="P1Y", + ... ) + + """ + service = "field-measurements-metadata" + output_id = "field_series_id" + + args = _get_args(locals()) + + return get_ogc_data(args, output_id, service) + + def get_reference_table( collection: str, limit: int | None = None, diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index a77afeaa..2ab5ddf0 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -13,6 +13,7 @@ get_continuous, get_daily, get_field_measurements, + get_field_measurements_metadata, get_latest_continuous, get_latest_daily, get_monitoring_locations, @@ -368,6 +369,36 @@ def test_get_combined_metadata_multi_site_post(): assert (df["parameter_code"] == "00060").all() +def test_get_field_measurements_metadata(): + df, md = get_field_measurements_metadata( + monitoring_location_id="USGS-02238500", skip_geometry=True + ) + assert "field_series_id" in df.columns + assert "begin" in df.columns + assert "end" in df.columns + assert (df["monitoring_location_id"] == "USGS-02238500").all() + assert hasattr(md, "url") + assert hasattr(md, "query_time") + + +def test_get_field_measurements_metadata_multi_site(): + df, _ = get_field_measurements_metadata( + monitoring_location_id=[ + "USGS-07069000", + "USGS-07064000", + "USGS-07068000", + ], + parameter_code="00060", + skip_geometry=True, + ) + assert (df["parameter_code"] == "00060").all() + assert set(df["monitoring_location_id"].unique()) == { + "USGS-07069000", + "USGS-07064000", + "USGS-07068000", + } + + def test_get_reference_table(): df, md = get_reference_table("agency-codes") assert "agency_code" in df.columns