From 0baddb0d65f62e727fbc5620d116a0eba0e8acbe Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 29 Apr 2026 16:17:32 -0500 Subject: [PATCH 01/29] add more python --- tutorials/quick_intro_deck.qmd | 77 ++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index b332a4427..d8d37fdd3 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -190,6 +190,10 @@ from dataretrieval import waterdata ## Documentation within R: function help pages {.smaller} +::: {.panel-tabset} + +### R + Within R, you can call help files for any `dataRetrieval` function: ```{r} @@ -198,6 +202,17 @@ Within R, you can call help files for any `dataRetrieval` function: ?read_waterdata_daily ``` +### Python + +Within Python, you can call help for any `dataRetrieval` function: + +```{python} +#| eval: !expr evaluate_python +help(waterdata.get_daily) +``` + +::: + :::: {.columns} ::: {.column width="50%"} @@ -215,6 +230,10 @@ Scroll down to the "Examples" to see how each function can be run. Examples +::: {.panel-tabset} + +### R + ```{r} #| eval: false site <- "USGS-02238500" @@ -225,6 +244,20 @@ dv_data_sf <- read_waterdata_daily( ) ``` +### Python + +```{python} +#| eval: false +df, md = dataretrieval.waterdata.get_daily( + + monitoring_location_id="USGS-02238500", + parameter_code="00060", + time="2021-01-01T00:00:00Z/2022-01-01T00:00:00Z", +) +``` + +::: + ::: :::: @@ -442,14 +475,14 @@ site = "USGS-09405500" pcode = "00060" # Discharge stat_cd = "00003" # Mean -df = waterdata.get_daily( +df, md = waterdata.get_daily( monitoring_location_id=site, parameter_code=pcode, statistic_id=stat_cd, time="2024-10-01/..", ) -df[0].shape[0] +df.shape[0] ``` @@ -604,20 +637,22 @@ qw_data <- read_waterdata_samples( ncol(qw_data) ``` +R generates a few POSIXct columns to combine date, time, timezone information. + ### Python ```{python} #| eval: !expr evaluate_python site = "USGS-01631000" pcode = "00660" -qw_data = waterdata.get_samples( - monitoringLocationIdentifier=site, - usgsPCode=pcode, - service="results", - profile="basicphyschem", +qw_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + service = "results", + profile = "basicphyschem", ) -qw_data[0].shape[1] +qw_data.shape[1] ``` ::: @@ -676,12 +711,16 @@ dt_me(df, escape = FALSE, paging = FALSE) * We'll look at Suisun Bay a Van Sickle Island NR Pittsburg CA ("USGS-11455508"), with parameter code "99133" which is Nitrate plus Nitrite. -## Workflow 3: Continuous data for known site +## Workflow 3: Continuous data for known site {.smaller} :::: {.columns} ::: {.column width="65%"} +::: {.panel-tabset} + +### R + ```{r} #| results: markup site_id <- "USGS-11455508" @@ -694,8 +733,28 @@ continuous_data <- read_waterdata_continuous( parameter_code = p_code_rt, time = c(start_date, end_date) ) +nrow(continuous_data) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" + +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = date_range +) +continuous_data.shape[0] + +``` + +::: + ::: ::: {.column width="35%"} From 20fd8f609f498a7f6fc0fcdbfbcfb4b41d982ae3 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 29 Apr 2026 17:32:16 -0500 Subject: [PATCH 02/29] will this just work? --- environment.yml | 2 +- tutorials/quick_intro_deck.qmd | 33 +++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/environment.yml b/environment.yml index 823381120..2244b414b 100644 --- a/environment.yml +++ b/environment.yml @@ -24,7 +24,7 @@ dependencies: - pip=25.2=pyh8b19718_0 - python=3.11.8=hab00c5b_0_cpython - readline=8.2=h8c095d6_2 - - scipy + - matplotlib - setuptools=80.9.0=pyhff2d567_0 - tk=8.6.13=noxft_hd72426e_102 - tzdata=2025b=h78e105d_0 diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index d8d37fdd3..125507745 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -34,6 +34,9 @@ library(ggplot2) library(dplyr) library(reticulate) py_require("dataretrieval") +py_require("panda") +py_require("matplotlib") + options(dplyr.summarise.inform = FALSE) evaluate_python <- params$run_python @@ -248,7 +251,7 @@ dv_data_sf <- read_waterdata_daily( ```{python} #| eval: false -df, md = dataretrieval.waterdata.get_daily( +df, md = waterdata.get_daily( monitoring_location_id="USGS-02238500", parameter_code="00060", @@ -441,7 +444,7 @@ We're going walk through 3 retrievals: ## Workflow 1: Daily data for known site {.smaller} -Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2024 onward. +Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2025 onward. ::: {.panel-tabset} @@ -453,7 +456,7 @@ library(dataRetrieval) site <- "USGS-09405500" pcode <- "00060" # Discharge stat_cd <- "00003" # Mean -range <- c("2024-10-01", NA) +range <- c("2025-10-01", NA) df <- read_waterdata_daily( monitoring_location_id = site, @@ -479,7 +482,7 @@ df, md = waterdata.get_daily( monitoring_location_id=site, parameter_code=pcode, statistic_id=stat_cd, - time="2024-10-01/..", + time="2025-10-01/..", ) df.shape[0] @@ -511,6 +514,10 @@ dt_me( ## Workflow 1: Plot Daily Data +::: {.panel-tabset} + +### R + Let's use `ggplot2` to visualize the data. ```{r} @@ -522,6 +529,24 @@ ggplot(data = df) + geom_point(aes(x = time, y = value, color = approval_status)) ``` +### Python + +Let's use `matplotlib` to visualize the data. + +```{python} +#| echo: true +#| output-location: column +import matplotlib.pyplot as plt +import pandas as pd + +df["approval_status"] = pd.Categorical(df["approval_status"]).codes + +plt.scatter(x=df.time, y=df.value, c=df.approval_status) +``` + + +::: + ## Water Data API Notes: Argument input Use your "tab" key! From 7584089e11a30898c87c057c0f986c0571ed5030 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 07:53:54 -0500 Subject: [PATCH 03/29] PEAKS! --- .Rbuildignore | 2 + .gitignore | 1 + NAMESPACE | 1 + R/AAA.R | 3 +- R/dataRetrieval-package.R | 2 +- R/read_waterdata_peaks.R | 81 +++++++++++ man/read_waterdata_field_measurements.Rd | 2 +- man/read_waterdata_peaks.Rd | 168 +++++++++++++++++++++++ 8 files changed, 257 insertions(+), 3 deletions(-) create mode 100644 R/read_waterdata_peaks.R create mode 100644 man/read_waterdata_peaks.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 819a77239..bb231a66a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -71,3 +71,5 @@ vignettes/Reference_Lists.Rmd ^[.]?air[.]toml$ ^\.vscode$ environment.yml +^\.positai$ +^\.claude$ diff --git a/.gitignore b/.gitignore index 02d0a38fd..892ca673e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ vignettes/*.html vignettes/*.R /.quarto/ **/*.quarto_ipynb +.positai diff --git a/NAMESPACE b/NAMESPACE index 4817ef167..5db89fe84 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,6 +55,7 @@ export(read_waterdata_latest_daily) export(read_waterdata_metadata) export(read_waterdata_monitoring_location) export(read_waterdata_parameter_codes) +export(read_waterdata_peaks) export(read_waterdata_ratings) export(read_waterdata_samples) export(read_waterdata_stats_daterange) diff --git a/R/AAA.R b/R/AAA.R index e02bd1987..d2ed38956 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -24,7 +24,8 @@ pkg.env <- new.env() "continuous", "field-measurements-metadata", "combined-metadata", - "channel-measurements" + "channel-measurements", + "peaks" ) collections <- c( "parameter-codes", diff --git a/R/dataRetrieval-package.R b/R/dataRetrieval-package.R index 9eda9c1f0..de5ae89e9 100644 --- a/R/dataRetrieval-package.R +++ b/R/dataRetrieval-package.R @@ -207,7 +207,7 @@ NULL # "monitoring-locations", "latest-continuous", # "field-measurements", "latest-daily", # "continuous", "field-measurements-metadata", -# "combined-metadata", "channel-measurements") +# "combined-metadata", "channel-measurements", "peaks") # # property_list <- list() # for(service in services){ diff --git a/R/read_waterdata_peaks.R b/R/read_waterdata_peaks.R new file mode 100644 index 000000000..7ea265d8a --- /dev/null +++ b/R/read_waterdata_peaks.R @@ -0,0 +1,81 @@ +#' Get USGS Peak Data +#' +#' @description `r get_description("peaks")` +#' +#' @export +#' @param monitoring_location_id `r get_ogc_params("peaks")$monitoring_location_id` +#' Multiple monitoring_location_ids can be requested as a character vector. +#' @param parameter_code `r get_ogc_params("peaks")$parameter_code` +#' Multiple parameter_codes can be requested as a character vector. +#' @param time `r get_ogc_params("peaks")$time` +#' +#' See also Details below for more information. +#' @param value `r get_ogc_params("peaks")$value` +#' @param unit_of_measure `r get_ogc_params("peaks")$unit_of_measure` +#' @param time_series_id `r get_ogc_params("peaks")$time_series_id` +#' @param last_modified `r get_ogc_params("peaks")$last_modified` +#' +#' See also Details below for more information. +#' @param water_year `r get_ogc_params("peaks")$water_year` +#' @param year `r get_ogc_params("peaks")$year` +#' @param month `r get_ogc_params("peaks")$month` +#' @param day `r get_ogc_params("peaks")$day` +#' @param time_of_day `r get_ogc_params("peaks")$time_of_day` +#' @param peak_since `r get_ogc_params("peaks")$peak_since` +#' @param properties A vector of requested columns to be returned from the query. +#' Available options are: +#' `r dataRetrieval:::get_properties_for_docs("peaks", "peak_id")`. +#' The default (`NA`) will return all columns of the data. +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api +#' +#' @inherit read_waterdata_continuous details +#' +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' wi_peaks <- read_waterdata_combined_meta( +#' state_name = "Wisconsin", +#' data_type = "Peaks", +#' parameter_code = "00060") +#' +#' +#' dv_data_sf <- read_waterdata_peaks( +#' monitoring_location_id = site, +#' parameter_code = "00060") +#' +#' } +read_waterdata_peaks <- function( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + unit_of_measure = NA_character_, + value = NA, + last_modified = NA_character_, + water_year = NA_character_, + year = NA_character_, + month = NA_character_, + day = NA_character_, + time_of_day = NA_character_, + peak_since = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) { + service <- "peaks" + output_id <- "peak_id" + rlang::check_dots_empty() + + args <- mget(names(formals())) + return_list <- get_ogc_data(args, output_id, service) + + return(return_list) +} diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd index b4de841a8..0a29719d5 100644 --- a/man/read_waterdata_field_measurements.Rd +++ b/man/read_waterdata_field_measurements.Rd @@ -50,7 +50,7 @@ The default (\code{NA}) will return all columns of the data.} \item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} -\item{field_measurements_series_id}{A unique identifier representing a single collection series. This corresponds to the \code{id} field in the \code{field-measurements-metadata} endpoint. Collection series are defined as the set of field measurements at a given monitoring location for a single parameter code using a single reading type.} +\item{field_measurements_series_id}{A unique identifier representing a single collection series. This corresponds to the \code{id} field in the \code{field-measurements-metadata} endpoint. Collection series are defined as the set of field measurements at a given monitoring location for a single parameter code using a single reading type.} \item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} diff --git a/man/read_waterdata_peaks.Rd b/man/read_waterdata_peaks.Rd new file mode 100644 index 000000000..5a3c77e53 --- /dev/null +++ b/man/read_waterdata_peaks.Rd @@ -0,0 +1,168 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_peaks.R +\name{read_waterdata_peaks} +\alias{read_waterdata_peaks} +\title{Get USGS Peak Data} +\usage{ +read_waterdata_peaks( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + unit_of_measure = NA_character_, + value = NA, + last_modified = NA_character_, + water_year = NA_character_, + year = NA_character_, + month = NA_character_, + day = NA_character_, + time_of_day = NA_character_, + peak_since = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). + +Multiple monitoring_location_ids can be requested as a character vector.} + +\item{parameter_code}{Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at \url{https://api.waterdata.usgs.gov/ogcapi/v0/collections/parameter-codes/items}. + +Multiple parameter_codes can be requested as a character vector.} + +\item{properties}{A vector of requested columns to be returned from the query. +Available options are: +geometry, time_series_id, monitoring_location_id, parameter_code, peak_id, unit_of_measure, value, last_modified, time, water_year, year, month, day, time_of_day, peak_since. +The default (\code{NA}) will return all columns of the data.} + +\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} + +\item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} + +\item{value}{The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision.} + +\item{last_modified}{The last time a record was refreshed in our database. This may happen due to regular operational processes and does not necessarily indicate anything about the measurement has changed. +You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{last_modified} that intersects the value of datetime are selected. + +See also Details below for more information.} + +\item{water_year}{The water year (running from October 1st to September 30th) a peak occurred.} + +\item{year}{The calendar year a peak occurred.} + +\item{month}{The calendar month a peak occurred. If null, the month a peak occurred is unknown.} + +\item{day}{The day of the month a peak occurred. If null, the day a peak occurred is unknown.} + +\item{time_of_day}{The time of day a peak occurred. If null, the time of day a peak occurred is unknown.} + +\item{peak_since}{If not null, this record represents the peak value for the parameter code since the year contained in "peak_since".} + +\item{skipGeometry}{This parameter can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information. The default \code{NA} will not specify the argument in the request.} + +\item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties. + +See also Details below for more information.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} +} +\description{ +Annual peak flow values are the maximum instantaneous streamflow values recorded at a particular site for the entire water year from October 1 to September 30. Note that the annual peak flow value may not occur at the same time the maximum water level occurs due to conditions such as backwater, tidal fluctuations, etc. +} +\details{ +You can also use a vector of length 2 for any time queries (such as time +or last_modified). The first value is the starting date (or datetime), +the second value is the ending date(or datetime). +NA's within the vector indicate a half-bound date. +For example, \code{time = c("2024-01-01", NA)} will return all data starting +at 2024-01-01. +\code{time = c(NA, "2024-01-01")} will return all data from the beginning of +the timeseries until 2024-01-01. +By default, time is assumed UTC, although time zone attributes +will be accommodated. As an example, setting \code{time = as.POSIXct(c("2021-01-01 12:00:00", +"2021-01-01 14:00"), tz = "America/New_York")} will request data that between +noon and 2pm eastern time on 2021-01-01. +All time values RETURNED from the service are UTC with the exception of +daily data, which returns time values in local dates. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ +wi_peaks <- read_waterdata_combined_meta( + state_name = "Wisconsin", + data_type = "Peaks", + parameter_code = "00060") + + +dv_data_sf <- read_waterdata_peaks( + monitoring_location_id = site, + parameter_code = "00060") + +} +\dontshow{\}) # examplesIf} +} From aa4a2a65f070236c805270f43bd4bd4627a70e64 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 08:22:25 -0500 Subject: [PATCH 04/29] Let's see about getting slides to render --- .github/workflows/pkgdown.yaml | 8 +- environment.yml | 36 ++++ tutorials/quick_intro_deck.qmd | 381 +++++++++++++++++++++++++-------- 3 files changed, 328 insertions(+), 97 deletions(-) create mode 100644 environment.yml diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 4491fc348..b5a0a8012 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -46,11 +46,9 @@ jobs: any::pkgdown any::rcmdcheck any::DT - any::data.table any::dplyr any::tidyr any::ggplot2 - any::zoo any::sf any::patchwork any::maps @@ -59,6 +57,12 @@ jobs: any::gridExtra local::. needs: website + - name: Setup Micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: environment.yml + cache-environment: false + cache-downloads: false - name: Create public directory run: | mkdir public diff --git a/environment.yml b/environment.yml new file mode 100644 index 000000000..2244b414b --- /dev/null +++ b/environment.yml @@ -0,0 +1,36 @@ +name: dataretrieval +channels: + - conda-forge +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - bzip2=1.0.8=hda65f42_8 + - ca-certificates=2025.8.3=hbd8a1cb_0 + - ld_impl_linux-64=2.44=h1423503_1 + - libexpat=2.7.1=hecca717_0 + - libffi=3.4.6=h2dba641_1 + - libgcc=15.1.0=h767d61c_5 + - libgcc-ng=15.1.0=h69a702a_5 + - libgomp=15.1.0=h767d61c_5 + - liblzma=5.8.1=hb9d3cd8_2 + - liblzma-devel=5.8.1=hb9d3cd8_2 + - libnsl=2.0.1=hb9d3cd8_1 + - libsqlite=3.50.4=h0c1763c_0 + - libuuid=2.41.1=he9a06e4_0 + - libxcrypt=4.4.36=hd590300_1 + - libzlib=1.3.1=hb9d3cd8_2 + - ncurses=6.5=h2d0b736_3 + - openssl=3.5.3=h26f9b46_0 + - pip=25.2=pyh8b19718_0 + - python=3.11.8=hab00c5b_0_cpython + - readline=8.2=h8c095d6_2 + - matplotlib + - setuptools=80.9.0=pyhff2d567_0 + - tk=8.6.13=noxft_hd72426e_102 + - tzdata=2025b=h78e105d_0 + - wheel=0.45.1=pyhd8ed1ab_1 + - xz=5.8.1=hbcc6ac9_2 + - xz-gpl-tools=5.8.1=hbcc6ac9_2 + - xz-tools=5.8.1=hb9d3cd8_2 + - dataretrieval +prefix: /home/user/miniforge3/envs/dataretrieval diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index 7046eef8a..125507745 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -15,44 +15,64 @@ title-slide-attributes: data-background-size: 15% data-background-position: 2% 2% editor: source +engine: knitr editor_options: chunk_output_type: console execute: echo: true warning: false message: false +params: + run_python: true --- ```{r} #| echo: false #| include: false -# library(dataRetrieval) +#| # library(dataRetrieval) library(ggplot2) library(dplyr) +library(reticulate) +py_require("dataretrieval") +py_require("panda") +py_require("matplotlib") + options(dplyr.summarise.inform = FALSE) -dt_me <- function(x, - page_length = 8, - paging = TRUE, - font = "0.7em", - escape = TRUE){ - DT::datatable(x, - rownames = FALSE, - options = list(pageLength = page_length, - info = FALSE, - searching = FALSE, - paging = paging, - lengthChange = FALSE, - initComplete = htmlwidgets::JS( - "function(settings, json) {", - paste0("$(this.api().table().container()).css({'font-size': '", - font, "'});"), - "}")), escape = escape) +evaluate_python <- params$run_python + +dt_me <- function( + x, + page_length = 8, + paging = TRUE, + font = "0.7em", + escape = TRUE +) { + DT::datatable( + x, + rownames = FALSE, + options = list( + pageLength = page_length, + info = FALSE, + searching = FALSE, + paging = paging, + lengthChange = FALSE, + initComplete = htmlwidgets::JS( + "function(settings, json) {", + paste0( + "$(this.api().table().container()).css({'font-size': '", + font, + "'});" + ), + "}" + ) + ), + escape = escape + ) } theme_set(theme_grey(base_size = 24)) -update_geom_defaults("point", list(size = 3)) - +update_geom_defaults("point", list(size = 3)) ``` @@ -111,11 +131,14 @@ In this ~45 minute introduction, the goal is: `dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: +::: {.panel-tabset} + +### R + ```{r} #| echo: true #| eval: false install.packages("dataRetrieval") - ``` Then each time you open R, you'll need to load the library: @@ -125,6 +148,24 @@ Then each time you open R, you'll need to load the library: library(dataRetrieval) ``` +### Python + +```{bash} +#| echo: true +#| eval: false +pip install dataretrieval + +``` + +Then each time you open Python, you'll need to load the library: + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata +``` + +::: + ::: footer ::: @@ -152,14 +193,29 @@ library(dataRetrieval) ## Documentation within R: function help pages {.smaller} +::: {.panel-tabset} + +### R + Within R, you can call help files for any `dataRetrieval` function: ```{r} #| echo: true #| eval: false -?readWQPdata +?read_waterdata_daily ``` +### Python + +Within Python, you can call help for any `dataRetrieval` function: + +```{python} +#| eval: !expr evaluate_python +help(waterdata.get_daily) +``` + +::: + :::: {.columns} ::: {.column width="50%"} @@ -177,20 +233,36 @@ Scroll down to the "Examples" to see how each function can be run. Examples +::: {.panel-tabset} + +### R + ```{r} #| eval: false -# Legacy: -nameToUse <- "pH" -pHData <- readWQPdata(siteid = "USGS-04024315", - characteristicName = nameToUse) -ncol(pHData) -attr(pHData, "siteInfo") -attr(pHData, "queryTime") -attr(pHData, "url") +site <- "USGS-02238500" +dv_data_sf <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = "00060", + time = c("2021-01-01", "2022-01-01") +) +``` + +### Python + +```{python} +#| eval: false +df, md = waterdata.get_daily( + + monitoring_location_id="USGS-02238500", + parameter_code="00060", + time="2021-01-01T00:00:00Z/2022-01-01T00:00:00Z", +) ``` ::: +::: + :::: ::: footer @@ -370,9 +442,13 @@ We're going walk through 3 retrievals: ::: -## Workflow 1: Daily data for known site +## Workflow 1: Daily data for known site {.smaller} + +Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2025 onward. -Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2024 onward. +::: {.panel-tabset} + +### R ```{r} #| message: true @@ -380,15 +456,41 @@ library(dataRetrieval) site <- "USGS-09405500" pcode <- "00060" # Discharge stat_cd <- "00003" # Mean -range <- c("2024-10-01", NA) +range <- c("2025-10-01", NA) + +df <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = pcode, + statistic_id = stat_cd, + time = range +) -df <- read_waterdata_daily(monitoring_location_id = site, - parameter_code = pcode, - statistic_id = stat_cd, - time = range) +nrow(df) +``` + +### Python + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata + +site = "USGS-09405500" +pcode = "00060" # Discharge +stat_cd = "00003" # Mean +df, md = waterdata.get_daily( + monitoring_location_id=site, + parameter_code=pcode, + statistic_id=stat_cd, + time="2025-10-01/..", +) + +df.shape[0] ``` + +::: + ::: footer ::: @@ -399,12 +501,11 @@ In RStudio, click on the data frame in the upper right Environment tab to open a ```{r} #| echo: false - -dt_me(df |> - sf::st_drop_geometry(), - page_length = 3) - - +dt_me( + df |> + sf::st_drop_geometry(), + page_length = 3 +) ``` ::: footer @@ -413,6 +514,10 @@ dt_me(df |> ## Workflow 1: Plot Daily Data +::: {.panel-tabset} + +### R + Let's use `ggplot2` to visualize the data. ```{r} @@ -421,12 +526,27 @@ Let's use `ggplot2` to visualize the data. library(ggplot2) ggplot(data = df) + - geom_point(aes(x = time, - y = value, - color = approval_status)) + geom_point(aes(x = time, y = value, color = approval_status)) +``` + +### Python +Let's use `matplotlib` to visualize the data. + +```{python} +#| echo: true +#| output-location: column +import matplotlib.pyplot as plt +import pandas as pd + +df["approval_status"] = pd.Categorical(df["approval_status"]).codes + +plt.scatter(x=df.time, y=df.value, c=df.approval_status) ``` + +::: + ## Water Data API Notes: Argument input Use your "tab" key! @@ -444,9 +564,10 @@ Use your "tab" key! ```{r} #| eval: false #| echo: true -discharge <- read_waterdata_daily(parameter_code = "00060", - statistic_id = "00003") - +discharge <- read_waterdata_daily( + parameter_code = "00060", + statistic_id = "00003" +) ``` ::: {.fragment} @@ -492,9 +613,11 @@ Here are a bunch of valid inputs: time = "2025-01-01" time = as.Date("2025-01-01") time = "2025-01-01T23:20:50Z" -time = as.POSIXct("2025-01-01T23:20:50Z", - format = "%Y-%m-%dT%H:%M:%S", - tz = "UTC") +time = as.POSIXct( + "2025-01-01T23:20:50Z", + format = "%Y-%m-%dT%H:%M:%S", + tz = "UTC" +) # Ask for specific range time = c("2024-01-01", "2025-01-01") # or Dates or POSIXs # Asking beginning of record to specific end: @@ -517,22 +640,48 @@ Use your "tab" key! ![](images/autocomplete_samples.png) -## Workflow 2: Discrete data for known site +## Workflow 2: Discrete data for known site {.smaller} Let's get orthophosphate ("00660") data from the Shenandoah River at Front Royal, VA ("USGS-01631000"). +::: {.panel-tabset} + +### R + ```{r} #| message: true site <- "USGS-01631000" pcode <- "00660" -qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, - usgsPCode = pcode, - dataType = "results", - dataProfile = "basicphyschem") +qw_data <- read_waterdata_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + dataType = "results", + dataProfile = "basicphyschem" +) ncol(qw_data) ``` +R generates a few POSIXct columns to combine date, time, timezone information. + +### Python +```{python} +#| eval: !expr evaluate_python +site = "USGS-01631000" +pcode = "00660" + +qw_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + service = "results", + profile = "basicphyschem", +) + +qw_data.shape[1] +``` + +::: + That's a LOT of columns returned. We won't look at them here, but you can use `View` in RStudio to explore on your own. ::: footer @@ -549,21 +698,31 @@ That's a LOT of columns returned. We won't look at them here, but you can use `V ```{r} #| echo: false - -df <- tibble(dataType = c("results", "locations", "activities", "projects", "organizations"), - Description = c("Results data and metadata for measures and observations matching your query", - "Find monitoring locations that have data matching your query", - "Information about the monitoring activities conducted that produced data", - "Information on the projects that have results matching your data query", - "Information about the organizations that have provided data that matches your query"), - dataProfile = c('fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', - 'site
count', - 'sampact
actmetric
actgroup
ncount', - 'project
projectmonitoringlocationweight', - 'organization
count')) +df <- tibble( + dataType = c( + "results", + "locations", + "activities", + "projects", + "organizations" + ), + Description = c( + "Results data and metadata for measures and observations matching your query", + "Find monitoring locations that have data matching your query", + "Information about the monitoring activities conducted that produced data", + "Information on the projects that have results matching your data query", + "Information about the organizations that have provided data that matches your query" + ), + dataProfile = c( + 'fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', + 'site
count', + 'sampact
actmetric
actgroup
ncount', + 'project
projectmonitoringlocationweight', + 'organization
count' + ) +) dt_me(df, escape = FALSE, paging = FALSE) - ``` ::: footer @@ -577,12 +736,16 @@ dt_me(df, escape = FALSE, paging = FALSE) * We'll look at Suisun Bay a Van Sickle Island NR Pittsburg CA ("USGS-11455508"), with parameter code "99133" which is Nitrate plus Nitrite. -## Workflow 3: Continuous data for known site +## Workflow 3: Continuous data for known site {.smaller} :::: {.columns} ::: {.column width="65%"} +::: {.panel-tabset} + +### R + ```{r} #| results: markup site_id <- "USGS-11455508" @@ -590,14 +753,35 @@ p_code_rt <- "99133" start_date <- "2024-01-01" end_date <- "2024-06-01" -continuous_data <- read_waterdata_continuous(monitoring_location_id = site_id, - parameter_code = p_code_rt, - time = c(start_date, end_date)) +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) +nrow(continuous_data) +``` + +### Python + +```{python} +#| eval: !expr evaluate_python +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" + +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = date_range +) +continuous_data.shape[0] ``` ::: +::: + ::: {.column width="35%"} ``` @@ -626,8 +810,7 @@ https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lan ```{r} #| output-location: column ggplot(data = continuous_data) + - geom_point(aes(x = time, - y = value)) + geom_point(aes(x = time, y = value)) ``` @@ -647,18 +830,19 @@ The next slides will demo how to use those. ## Data Discovery: Time Series {.smaller} ```{r} -ts_available <- read_waterdata_combined_meta(monitoring_location_id = "USGS-04183500") +ts_available <- read_waterdata_combined_meta( + monitoring_location_id = "USGS-04183500" +) ``` ```{r} #| echo: false - -dt_me(ts_available |> - sf::st_drop_geometry() |> - select(data_type, - parameter_name, - parameter_code, statistic_id, begin, end), page_length = 6) - +dt_me( + ts_available |> + sf::st_drop_geometry() |> + select(data_type, parameter_name, parameter_code, statistic_id, begin, end), + page_length = 6 +) ``` ::: footer @@ -668,19 +852,24 @@ dt_me(ts_available |> ## Data Discovery: Discrete {.smaller} ```{r} -discrete_available <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-04183500") - +discrete_available <- summarize_waterdata_samples( + monitoringLocationIdentifier = "USGS-04183500" +) ``` ```{r} #| echo: false - -dt_me(discrete_available |> - select(characteristicUserSupplied, - resultCount, activityCount, - firstActivity, mostRecentActivity), - page_length = 6) - +dt_me( + discrete_available |> + select( + characteristicUserSupplied, + resultCount, + activityCount, + firstActivity, + mostRecentActivity + ), + page_length = 6 +) ``` ::: footer @@ -692,8 +881,10 @@ dt_me(discrete_available |> * characteristicUserSupplied can be an input to `read_waterdata_sample` ```{r} -discrete1 <- read_waterdata_samples(characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", - monitoringLocationIdentifier = "USGS-04183500") +discrete1 <- read_waterdata_samples( + characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", + monitoringLocationIdentifier = "USGS-04183500" +) nrow(discrete1) ``` From d8d7f5d5f638a893b9f6d37259879d0824e18ebd Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 08:24:28 -0500 Subject: [PATCH 05/29] remove oooold articles --- _pkgdown.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index ed2f068e0..f16382908 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -58,12 +58,8 @@ navbar: href: articles/wqp_large_pull_script.html - text: Large Request Pipeline Approach href: articles/wqp_large_pull_targets.html - - text: Stat Service - href: articles/statsServiceMap.html - text: NLDI Interface href: articles/nldi.html - - text: Moving Averages - href: articles/movingAverages.html - text: How to Contribute href: articles/Contributing.html right: From 62c19342e6caac670c9b119c3447ac9ca26aae39 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 08:53:11 -0500 Subject: [PATCH 06/29] RStudio update --- .Rbuildignore | 2 ++ .gitignore | 1 + 2 files changed, 3 insertions(+) diff --git a/.Rbuildignore b/.Rbuildignore index 7ad0cd654..24ec5b01e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -69,3 +69,5 @@ vignettes/continuous_pr.Rmd vignettes/quick_slides.Rmd ^[.]?air[.]toml$ ^\.vscode$ +^\.positai$ +^\.claude$ diff --git a/.gitignore b/.gitignore index 6debd82a7..0eade678e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ vignettes/*.R /.quarto/ **/*.quarto_ipynb +.positai From 05f34a4afeb6d2789c48903e514a6a9070a0ed85 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 09:11:46 -0500 Subject: [PATCH 07/29] add peaks --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index df8a331bf..6dc8a7711 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -90,6 +90,7 @@ reference: - read_waterdata_field_meta - read_waterdata_combined_meta - read_waterdata_ratings + - read_waterdata_peaks - title: National Water Information System (NWIS) desc: Functions to retrieve (USGS) NWIS data. These will be slowly phased out and replaced with the read_waterdata family of functions. contents: From dbb4a153696de4c51e489c526d9b0b51765471e5 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 09:13:05 -0500 Subject: [PATCH 08/29] removing old vignettes --- vignettes/movingAverages.Rmd | 278 ---------------------------------- vignettes/statsServiceMap.Rmd | 197 ------------------------ 2 files changed, 475 deletions(-) delete mode 100644 vignettes/movingAverages.Rmd delete mode 100644 vignettes/statsServiceMap.Rmd diff --git a/vignettes/movingAverages.Rmd b/vignettes/movingAverages.Rmd deleted file mode 100644 index 96b024b4e..000000000 --- a/vignettes/movingAverages.Rmd +++ /dev/null @@ -1,278 +0,0 @@ ---- -title: "Calculating Moving Averages and Historical Flow Quantiles" -author: "Laura DeCicco" -date: "2016-10-25" -output: - rmarkdown::html_vignette: - toc: true - fig_caption: yes - fig_height: 7 - fig_width: 7 -vignette: > - %\VignetteIndexEntry{Calculating Moving Averages and Historical Flow Quantiles} - \usepackage[utf8]{inputenc} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console ---- - -**WARNING** - -This post is very old! A better way to do all these plots and calculations can be found here: - -**WARNING** - -This post will show simple way to calculate moving averages, calculate historical-flow quantiles, and plot that information. The goal is to reproduce the graph at this link: -[PA Graph](http://pa.water.usgs.gov/drought/indicators/sw/images/f30_01538000.html). The motivation for this post was inspired by a USGS colleague that that is considering creating these type of plots in R. We thought this plot provided an especially fun challenge - maybe you will, too! - -First we get the data using the [dataRetrieval](https://CRAN.R-project.org/package=dataRetrieval) package. The siteNumber and parameterCd could be adjusted for other sites or measured parameters. In this example, we are getting discharge (parameter code 00060) at a site in PA. - -It may be important to note that this script is a bit lazy in handling leap days. - -## Get data using dataRetrieval - -```{r message=FALSE} -library(dataRetrieval) - -# Retrieve daily Q -siteNumber <- c("01538000") -parameterCd <- "00060" # Discharge -dailyQ <- readNWISdv(siteNumber, parameterCd) -dailyQ <- renameNWISColumns(dailyQ) -stationInfo <- readNWISsite(siteNumber) -nrow(dailyQ) -``` - -## Calculate moving average - -Next, we calculate a 30-day moving average on all of the flow data: - -```{r message=FALSE} -library(dplyr) -library(zoo) - -# Check for missing days, if so, add NA rows: -if (as.numeric(diff(range(dailyQ$Date))) != (nrow(dailyQ) + 1)) { - fullDates <- seq( - from = min(dailyQ$Date), - to = max(dailyQ$Date), by = "1 day" - ) - fullDates <- data.frame( - Date = fullDates, - agency_cd = unique(dailyQ$agency_cd), - site_no = unique(dailyQ$site_no) - ) - dailyQ <- fullDates %>% - left_join(dailyQ, - by = c("Date", "agency_cd", "site_no") - ) %>% - arrange(Date) -} - -dailyQ <- dailyQ %>% - mutate( - rollMean = rollmean(Flow, 30, fill = NA, align = "center"), - day.of.year = as.numeric(strftime(Date, - format = "%j" - )) - ) -``` - -## Calculate historical percentiles - -We can use the `quantile` function to calculate historical percentile flows. Then use the `loess` function for smoothing. The argument `smooth.span` defines how much smoothing should be applied. To get a smooth transistion at the start of the graph, we can add include an earlier year which is not plotted at the end. - -```{r message=FALSE} -summaryQ <- dailyQ %>% - group_by(day.of.year) %>% - summarize( - p75 = quantile(rollMean, probs = .75, na.rm = TRUE), - p25 = quantile(rollMean, probs = .25, na.rm = TRUE), - p10 = quantile(rollMean, probs = 0.1, na.rm = TRUE), - p05 = quantile(rollMean, probs = 0.05, na.rm = TRUE), - p00 = quantile(rollMean, probs = 0, na.rm = TRUE) - ) - -current.year <- as.numeric(strftime(Sys.Date(), format = "%Y")) - -summary.0 <- summaryQ %>% - mutate( - Date = as.Date(day.of.year - 1, - origin = paste0(current.year - 2, "-01-01") - ), - day.of.year = day.of.year - 365 - ) -summary.1 <- summaryQ %>% - mutate(Date = as.Date(day.of.year - 1, - origin = paste0(current.year - 1, "-01-01") - )) -summary.2 <- summaryQ %>% - mutate( - Date = as.Date(day.of.year - 1, - origin = paste0(current.year, "-01-01") - ), - day.of.year = day.of.year + 365 - ) - -summaryQ <- bind_rows(summary.0, summary.1, summary.2) - - -smooth.span <- 0.3 - -summaryQ$sm.75 <- predict(loess(p75 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.25 <- predict(loess(p25 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.10 <- predict(loess(p10 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.05 <- predict(loess(p05 ~ day.of.year, data = summaryQ, span = smooth.span)) -summaryQ$sm.00 <- predict(loess(p00 ~ day.of.year, data = summaryQ, span = smooth.span)) - -latest.years <- dailyQ %>% - filter(Date >= as.Date(paste0(current.year - 1, "-01-01"))) %>% - mutate(day.of.year = seq_len(nrow(.))) - -# Let's just take the middle chunk: -summaryQ <- summaryQ %>% - filter(day.of.year %in% 1:365) - -summaryQ <- summaryQ %>% - bind_rows( - summaryQ, - summaryQ - ) %>% - mutate(day.of.year = seq_len(nrow(.)) - 365) -``` - -## Plot using base R - -Many of the graphical requirements defined by the USGS are difficult to achieve in `ggplot2`. Base R plotting can be used to obtain these types of graphs: - -```{r fig.cap="Simple 30-day moving average daily flow plot using base R"} - -title.text <- paste0( - stationInfo$station_nm, "\n", - "Provisional Data - Subject to change\n", - "Record Start = ", min(dailyQ$Date), - " Number of years = ", - as.integer(as.numeric(difftime( - time1 = max(dailyQ$Date), - time2 = min(dailyQ$Date), - units = "weeks" - )) / 52.25), - "\nDate of plot = ", Sys.Date(), - " Drainage Area = ", stationInfo$drain_area_va, "mi^2" -) - -mid.month.days <- c(15, 45, 74, 105, 135, 166, 196, 227, 258, 288, 319, 349) -month.letters <- c("J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D") -start.month.days <- c(1, 32, 61, 92, 121, 152, 182, 214, 245, 274, 305, 335) -label.text <- c("Normal", "Drought Watch", "Drought Warning", "Drought Emergency") - -plot(latest.years$day.of.year, latest.years$rollMean, - ylim = c(1, 1000), xlim = c(1, 733), - log = "y", axes = FALSE, type = "n", xaxs = "i", yaxs = "i", - ylab = "30-day moving ave", - xlab = "" -) -title(title.text, cex.main = 0.75) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.75, rev(summaryQ$sm.25)), - col = "darkgreen", border = FALSE -) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.25, rev(summaryQ$sm.10)), - col = "yellow", border = FALSE -) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.10, rev(summaryQ$sm.05)), - col = "orange", border = FALSE -) -polygon(c(summaryQ$day.of.year, rev(summaryQ$day.of.year)), - c(summaryQ$sm.05, rev(summaryQ$sm.00)), - col = "red", border = FALSE -) -lines(latest.years$day.of.year, latest.years$rollMean, - lwd = 2, col = "black" -) -abline(v = 366) -axis(2, las = 1, at = c(1, 100, 1000), tck = -0.02) -axis(2, at = c(seq(1, 90, by = 10)), labels = NA, tck = -0.01) -axis(2, at = c(seq(100, 1000, by = 100)), labels = NA, tck = -0.01) -axis(1, - at = c(mid.month.days, 365 + mid.month.days), - labels = rep(month.letters, 2), - tick = FALSE, line = -0.5, cex.axis = 0.75 -) -axis(1, - at = c(start.month.days, 365 + start.month.days), - labels = NA, tck = -0.02 -) -axis(1, - at = c(182, 547), labels = c(current.year - 1, current.year), - line = .5, tick = FALSE -) -legend("bottom", label.text, - horiz = TRUE, - fill = c("darkgreen", "yellow", "orange", "red"), - inset = c(0, 0), xpd = TRUE, bty = "n", cex = 0.75 -) -box() -``` - - -## Plot using ggplot2 - -Finally, we can also try to create the graph using the `ggplot2` package. The following script shows a simple way to re-create the graph in `ggplot2` with no effort on imitating desired style: - - -```{r fig.cap="Simple 30-day moving average daily flow plot using ggplot2", alt.text = "30-day moving average daily flow plot, no effort on style", message=FALSE, warning=FALSE, fig.height=5} -library(ggplot2) - -simple.plot <- ggplot(data = summaryQ, aes(x = day.of.year)) + - geom_ribbon(aes(ymin = sm.25, ymax = sm.75, fill = "Normal")) + - geom_ribbon(aes(ymin = sm.10, ymax = sm.25, fill = "Drought Watch")) + - geom_ribbon(aes(ymin = sm.05, ymax = sm.10, fill = "Drought Warning")) + - geom_ribbon(aes(ymin = sm.00, ymax = sm.05, fill = "Drought Emergency")) + - scale_y_log10(limits = c(1, 1000)) + - geom_line(data = latest.years, aes(x = day.of.year, y = rollMean, color = "30-Day Mean"), size = 2) + - geom_vline(xintercept = 365) - -simple.plot -``` - -Next, we can play with various options to do a better job to imitate the style: - -```{r fig.cap="Detailed 30-day moving average daily flow plot", alt.text = "30-day moving average daily flow plot", message=FALSE, warning=FALSE} - -styled.plot <- simple.plot + - scale_x_continuous( - breaks = c(mid.month.days, 365 + mid.month.days), - labels = rep(month.letters, 2), - expand = c(0, 0), - limits = c(0, 730) - ) + - annotation_logticks(sides = "l") + - expand_limits(x = 0) + - annotate( - geom = "text", - x = c(182, 547), - y = 1, - label = c(current.year - 1, current.year), size = 4 - ) + - theme_bw() + - theme( - axis.ticks.x = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank() - ) + - labs(title = title.text, - y = "30-day moving ave", x = "" - ) + - scale_fill_manual( - name = "", breaks = label.text, - values = c("red", "orange", "yellow", "darkgreen") - ) + - scale_color_manual(name = "", values = "black") + - theme(legend.position = "bottom") - -styled.plot -``` - diff --git a/vignettes/statsServiceMap.Rmd b/vignettes/statsServiceMap.Rmd deleted file mode 100644 index 08b218bb6..000000000 --- a/vignettes/statsServiceMap.Rmd +++ /dev/null @@ -1,197 +0,0 @@ ---- -title: "Using the dataRetrieval Stats Service" -author: "David Watkins" -date: "2016-10-05" -output: - rmarkdown::html_vignette: - toc: true - fig_caption: yes - fig_height: 7 - fig_width: 7 -vignette: > - %\VignetteIndexEntry{Using the dataRetrieval Stats Service} - \usepackage[utf8]{inputenc} - %\VignetteEngine{knitr::rmarkdown} ---- - -# Introduction - -This script utilizes the new `dataRetrieval` package access to the [USGS Statistics Web Service](https://waterservices.usgs.gov/docs/statistics/). We will be pulling daily mean data using the daily value service in `readNWISdata`, and using the stats service data to put it in the context of the site's history. Here we are retrieving data for July 12th in the Upper Midwest, where a major storm system had recently passed through. You can modify this script to look at other areas and dates simply by modifying the `states` and `storm.date` objects. - -To run this code, we recommend having either `dataRetreival` version 2.5.13 (currently the latest release on CRAN) or version 2.6.1 (currently the latest Github release). - -# Get the data - -There are two separate `dataRetrieval` calls here — one to retrieve the daily discharge data, and one to retrieve the historical discharge statistics. Both calls are inside loops to split them into smaller pieces, to accomodate web service restrictions. The daily values service allows only single states as a filter, so we loop over the list of states. The stats service does not allow requests of more than ten sites, so the loop iterates by groups of ten site codes. Retrieving the data can take a few tens of seconds. Once we have both the daily value and statistics data, the two data frames are joined by site number via [dplyr's](https://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html) `left_join` function. We use a [pipe](https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html) to send the output of the join to `na.omit()` function. Then we add a column to the final data frame to hold the color value for each station. - -```{r getData, warning=FALSE, message=FALSE} -# example stats service map, comparing real-time current discharge to history for each site -# reusable for other state(s) -# David Watkins June 2016 - - -library(maps) -library(dplyr) -library(lubridate) -library(dataRetrieval) - -# pick state(s) and date -states <- c("WI", "MN", "ND", "SD", "IA") -storm.date <- "2016-07-12" - -# download each state individually -for (st in states) { - stDV <- renameNWISColumns(readNWISdata( - service = "dv", - parameterCd = "00060", - stateCd = st, - startDate = storm.date, - endDate = storm.date - )) - if (st != states[1]) { - storm.data <- full_join(storm.data, stDV) - sites <- full_join(sites, attr(stDV, "siteInfo")) - } else { - storm.data <- stDV - sites <- attr(stDV, "siteInfo") - } -} - -# retrieve stats data, dealing with 10 site limit to stat service requests -reqBks <- seq(1, nrow(sites), by = 10) -statData <- data.frame() -for (i in reqBks) { - getSites <- sites$site_no[i:(i + 9)] - currentSites <- readNWISstat( - siteNumbers = getSites, - parameterCd = "00060", - statReportType = "daily", - statType = c("p10", "p25", "p50", "p75", "p90", "mean") - ) - statData <- rbind(statData, currentSites) -} - -statData.storm <- statData[statData$month_nu == month(storm.date) & - statData$day_nu == day(storm.date), ] - -finalJoin <- left_join(storm.data, statData.storm) -finalJoin <- left_join(finalJoin, sites) - -finalJoin[, grep("_va", names(finalJoin))] <- sapply( - finalJoin[ - , - grep("_va", names(finalJoin)) - ], - function(x) as.numeric(x) -) - -# remove sites without current data -finalJoin <- finalJoin[!is.na(finalJoin$Flow), ] - - -# classify current discharge values -finalJoin$class <- NA - -finalJoin$class[finalJoin$Flow > finalJoin$p75_va] <- "navy" -finalJoin$class[finalJoin$Flow < finalJoin$p25_va] <- "red" - -finalJoin$class[finalJoin$Flow > finalJoin$p25_va & - finalJoin$Flow <= finalJoin$p50_va] <- "green" -finalJoin$class[finalJoin$Flow > finalJoin$p50_va & - finalJoin$Flow <= finalJoin$p75_va] <- "blue" - -finalJoin$class[is.na(finalJoin$class) & - finalJoin$Flow > finalJoin$p50_va] <- "cyan" -finalJoin$class[is.na(finalJoin$class) & - finalJoin$Flow < finalJoin$p50_va] <- "yellow" - -# take a look at the columns that we will plot later: -head(finalJoin[, c("dec_lon_va", "dec_lat_va", "class")]) -``` - -# Make the static plot - -The base map consists of two plots. The first makes the county lines with a gray background, and the second overlays the heavier state lines. After that we add the points for each stream gage, colored by the column we added to `finalJoin`. In the finishing details, `grconvertXY` is a handy function that converts your inputs from a normalized (0-1) coordinate system to the actual map coordinates, which allows the legend and scale to stay in the same relative location on different maps. - -```{r plot, fig.cap="Map discharge percentiles"} -# convert states from postal codes to full names -states <- stateCdLookup(states, outputType = "fullName") -par(pty = "s") -map("county", regions = states, fill = TRUE, col = "gray87", lwd = 0.5) -map("state", regions = states, fill = FALSE, lwd = 2, add = TRUE) -points(finalJoin$dec_lon_va, - finalJoin$dec_lat_va, - col = finalJoin$class, pch = 19 -) -title(paste("Daily discharge value percentile rank\n", storm.date), line = 1) -par(mar = c(5.1, 4.1, 4.1, 6), xpd = TRUE) - -legend.colors <- c( - "cyan", "yellow", - "red", - "green", "blue", - "navy" -) -legend.names <- c( - "Q > P50*", "Q < P50*", - "Q < P25", - "P25 < Q < P50", "P50 < Q < P75", - "Q > P75" -) - -legend("bottomleft", - inset = c(0.01, .01), - legend = legend.names, - pch = 19, cex = 0.75, pt.cex = 1.2, - col = legend.colors, - ncol = 2 -) -map.scale( - ratio = FALSE, cex = 0.75, - grconvertX(.07, "npc"), - grconvertY(.2, "npc") -) -text("*Other percentiles not available for these sites", - cex = 0.75, - x = grconvertX(0.2, "npc"), - y = grconvertY(-0.08, "npc") -) -``` - -# Make an interactive plot - -Static maps are great for papers and presentations. When possible, interactive maps allow the reader more flexibility to examine the data. The R leaflet package makes it easy to create useful interactive maps. - -```{r leaflet, fig.height=5} -library(leaflet) - -finalJoin$popup <- with(finalJoin, paste( - "", station_nm, - "
", - "Measured Flow:", Flow, - "ft3/s
", - "25% historical:", p25_va, - "ft3/s
", - "50% historical:", p50_va, - "ft3/s
", - "75% historical:", p75_va, - "ft3/s" -)) - -leafMapStat <- leaflet(data = finalJoin) %>% - addProviderTiles("CartoDB.Positron") %>% - addCircleMarkers(~dec_lon_va, ~dec_lat_va, - color = ~class, radius = 3, stroke = FALSE, - fillOpacity = 0.8, opacity = 0.8, - popup = ~popup - ) - -leafMapStat <- addLegend(leafMapStat, - position = "bottomleft", - colors = legend.colors, - labels = legend.names, - opacity = 0.8 -) - -leafMapStat -``` From 7b4a49b48527248edab9c5cf854b42a2f66d9a1f Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 09:43:55 -0500 Subject: [PATCH 09/29] Add reticulate to GHActions --- .github/workflows/pkgdown.yaml | 2 +- R/readNWISunit.R | 53 ++---- R/readWQPdata.R | 202 +++++++++++----------- R/readWQPqw.R | 26 +-- R/whatWQPdata.R | 38 ++-- R/whatWQPsites.R | 58 +++---- man/readNWISstat.Rd | 34 ++-- man/readNWISuv.Rd | 19 +- man/readWQPdata.Rd | 188 ++++++++++---------- man/readWQPqw.Rd | 26 +-- man/readWQPsummary.Rd | 44 ++--- man/read_waterdata_combined_meta.Rd | 12 +- man/read_waterdata_field_measurements.Rd | 10 +- man/read_waterdata_field_meta.Rd | 8 +- man/read_waterdata_monitoring_location.Rd | 2 +- man/read_waterdata_ts_meta.Rd | 14 +- man/whatWQPdata.Rd | 20 +-- man/wqpSpecials.Rd | 32 ++-- man/wqp_check_status.Rd | 12 +- 19 files changed, 387 insertions(+), 413 deletions(-) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index b5a0a8012..129542db0 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -51,7 +51,7 @@ jobs: any::ggplot2 any::sf any::patchwork - any::maps + any::reticulate any::leaflet any::readxl any::gridExtra diff --git a/R/readNWISunit.R b/R/readNWISunit.R index c5b4fbc62..ae117ea13 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -58,25 +58,10 @@ #' endDate <- "2014-10-10" #' \donttest{ #' -#' rawData <- readNWISuv(site_id, parameterCd, startDate, endDate) +#' #rawData <- readNWISuv(site_id, parameterCd, startDate, endDate) #' -#' rawData_today <- readNWISuv(site_id, parameterCd, Sys.Date(), Sys.Date()) +#' #rawData_today <- readNWISuv(site_id, parameterCd, Sys.Date(), Sys.Date()) #' -#' timeZoneChange <- readNWISuv( -#' c("04024430", "04024000"), parameterCd, -#' "2013-11-03", "2013-11-03" -#' ) -#' -#' centralTime <- readNWISuv(site_id, parameterCd, -#' "2014-10-10T12:00", "2014-10-10T23:59", -#' tz = "America/Chicago" -#' ) -#' -#' # Adding 'Z' to the time indicates to the web service to call the data with UTC time: -#' GMTdata <- readNWISuv( -#' site_id, parameterCd, -#' "2014-10-10T00:00Z", "2014-10-10T23:59Z" -#' ) #' } readNWISuv <- function( siteNumbers, @@ -352,28 +337,28 @@ readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { #' @export #' @examplesIf is_dataRetrieval_user() #' \donttest{ -#' x1 <- readNWISstat( -#' siteNumbers = c("02319394"), -#' parameterCd = c("00060"), -#' statReportType = "annual" -#' ) +#' # x1 <- readNWISstat( +#' # siteNumbers = c("02319394"), +#' # parameterCd = c("00060"), +#' # statReportType = "annual" +#' # ) #' #' # all the annual mean discharge data for two sites -#' x2 <- readNWISstat( -#' siteNumbers = c("02319394", "02171500"), -#' parameterCd = c("00010", "00060"), -#' statReportType = "annual" -#' ) +#' #x2 <- readNWISstat( +#' # siteNumbers = c("02319394", "02171500"), +#' # parameterCd = c("00010", "00060"), +#' # statReportType = "annual" +#' # ) #' #' # Request p25, p75, and mean values for temperature and discharge for the 2000s #' # Note that p25 and p75 were not available for temperature, and return NAs -#' x <- readNWISstat( -#' siteNumbers = c("02171500"), -#' parameterCd = c("00010", "00060"), -#' statReportType = "daily", -#' statType = c("mean", "median"), -#' startDate = "2000", endDate = "2010" -#' ) +#' #x <- readNWISstat( +#' # siteNumbers = c("02171500"), +#' # parameterCd = c("00010", "00060"), +#' # statReportType = "daily", +#' # statType = c("mean", "median"), +#' # startDate = "2000", endDate = "2010" +#' # ) #' } readNWISstat <- function( siteNumbers, diff --git a/R/readWQPdata.R b/R/readWQPdata.R index 92d70b67b..3f65c6263 100644 --- a/R/readWQPdata.R +++ b/R/readWQPdata.R @@ -86,47 +86,47 @@ #' \donttest{ #' #' # Legacy: -#' nameToUse <- "pH" -#' pHData <- readWQPdata(siteid = "USGS-04024315", -#' characteristicName = nameToUse) -#' ncol(pHData) -#' attr(pHData, "siteInfo") -#' attr(pHData, "queryTime") -#' attr(pHData, "url") +#' #nameToUse <- "pH" +#' #pHData <- readWQPdata(siteid = "USGS-04024315", +#' # characteristicName = nameToUse) +#' #ncol(pHData) +#' #attr(pHData, "siteInfo") +#' #attr(pHData, "queryTime") +#' #attr(pHData, "url") #' #' # WQX3: -#' pHData_wqx3 <- readWQPdata(siteid = "USGS-04024315", -#' characteristicName = nameToUse, -#' service = "ResultWQX3", -#' dataProfile = "basicPhysChem") -#' attr(pHData_wqx3, "url") +#' #pHData_wqx3 <- readWQPdata(siteid = "USGS-04024315", +#' # characteristicName = nameToUse, +#' # service = "ResultWQX3", +#' # dataProfile = "basicPhysChem") +#' # attr(pHData_wqx3, "url") #' #' # More examples: #' # querying by county -#' DeWitt <- readWQPdata( -#' statecode = "Illinois", -#' countycode = "DeWitt", -#' characteristicName = "Nitrogen" -#' ) +#' #DeWitt <- readWQPdata( +#' # statecode = "Illinois", +#' # countycode = "DeWitt", +#' # characteristicName = "Nitrogen" +#' # ) #' -#' attr(DeWitt, "url") +#' #attr(DeWitt, "url") #' -#' DeWitt_wqx3 <- readWQPdata( -#' statecode = "Illinois", -#' countycode = "DeWitt", -#' characteristicName = "Nitrogen", -#' service = "ResultWQX3", -#' dataProfile = "basicPhysChem", -#' ignore_attributes = TRUE) +#' #DeWitt_wqx3 <- readWQPdata( +#' # statecode = "Illinois", +#' # countycode = "DeWitt", +#' # characteristicName = "Nitrogen", +#' # service = "ResultWQX3", +#' # dataProfile = "basicPhysChem", +#' # ignore_attributes = TRUE) #' -#' attr(DeWitt_wqx3, "url") +#' #attr(DeWitt_wqx3, "url") #' #' # Data profile: "Sampling Activity" -#' activity <- readWQPdata( -#' siteid = "USGS-04024315", -#' service = "Activity" -#' ) -#' attr(activity, "url") +#' #activity <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # service = "Activity" +#' #) +#' #attr(activity, "url") #' #' # activity_wqx3 <- readWQPdata( #' # siteid = "USGS-04024315", @@ -134,14 +134,14 @@ #' # ) #' # attr(activity_wqx3, "url") #' -#' Dane_activity <- readWQPdata( -#' statecode = "Wisconsin", -#' countycode = "Dane", -#' startDateLo = "2023-01-01", -#' startDateHi = "2023-12-31", -#' service = "Activity" -#' ) -#' attr(Dane_activity, "url") +#' #Dane_activity <- readWQPdata( +#' # statecode = "Wisconsin", +#' # countycode = "Dane", +#' # startDateLo = "2023-01-01", +#' # startDateHi = "2023-12-31", +#' # service = "Activity" +#' # ) +#' #attr(Dane_activity, "url") #' #' # Dane_activity_wqx3 <- readWQPdata( #' # statecode = "Wisconsin", @@ -157,47 +157,47 @@ #' #' #' # Data profiles: "Organization Data" -#' org_data <- readWQPdata( -#' statecode = "WI", -#' countycode = "Dane", -#' service = "Organization" -#' ) +#' #org_data <- readWQPdata( +#' # statecode = "WI", +#' # countycode = "Dane", +#' # service = "Organization" +#' # ) #' #' # Data profiles: "Project Data" -#' project_data <- readWQPdata( -#' statecode = "WI", -#' countycode = "Dane", -#' service = "Project" -#' ) +#' #project_data <- readWQPdata( +#' # statecode = "WI", +#' # countycode = "Dane", +#' # service = "Project" +#' # ) #' #' # Data profiles: "Project Monitoring Location Weighting Data" -#' proj_mlwd <- readWQPdata( -#' statecode = "WI", -#' countycode = "Dane", -#' service = "ProjectMonitoringLocationWeighting" -#' ) +#' #proj_mlwd <- readWQPdata( +#' # statecode = "WI", +#' # countycode = "Dane", +#' # service = "ProjectMonitoringLocationWeighting" +#' # ) #' #' # Data profiles: "Sample Results (physical/chemical metadata)" -#' samp_data <- readWQPdata( -#' siteid = "USGS-04024315", -#' dataProfile = "resultPhysChem", -#' service = "Result" -#' ) +#' # samp_data <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # dataProfile = "resultPhysChem", +#' # service = "Result" +#' # ) #' #' # Data profiles: "Sample Results (biological metadata)" -#' samp_bio <- readWQPdata( -#' siteid = "USGS-04024315", -#' dataProfile = "biological", -#' service = "Result" -#' ) +#' #samp_bio <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # dataProfile = "biological", +#' # service = "Result" +#' # ) #' #' #' # Data profiles: "Sample Results (narrow)" -#' samp_narrow <- readWQPdata( -#' siteid = "USGS-04024315", -#' service = "Result", -#' dataProfile = "narrowResult" -#' ) +#' # samp_narrow <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # service = "Result", +#' # dataProfile = "narrowResult" +#' # ) #' #' # samp_narrow_wqx3 <- readWQPdata( #' # siteid = "USGS-04024315", @@ -207,38 +207,38 @@ #' #' #' # Data profiles: "Sampling Activity" -#' samp_activity <- readWQPdata( -#' siteid = "USGS-04024315", -#' dataProfile = "activityAll", -#' service = "Activity" -#' ) +#' #samp_activity <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # dataProfile = "activityAll", +#' # service = "Activity" +#' # ) #' #' # Data profile: "Sampling Activity Metrics" -#' act_metrics <- readWQPdata( -#' statecode = "WI", -#' countycode = "Dane", -#' service = "ActivityMetric" -#' ) +#' # act_metrics <- readWQPdata( +#' # statecode = "WI", +#' # countycode = "Dane", +#' # service = "ActivityMetric" +#' # ) #' #' # Data profile: "Result Detection Quantitation Limit Data" -#' dl_data <- readWQPdata( -#' siteid = "USGS-04024315", -#' service = "ResultDetectionQuantitationLimit" -#' ) +#' # dl_data <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # service = "ResultDetectionQuantitationLimit" +#' # ) #' #' # other options: -#' Phosphorus <- readWQPdata( -#' statecode = "WI", countycode = "Dane", -#' characteristicName = "Phosphorus", -#' startDateLo = "2023-01-01", -#' ignore_attributes = TRUE, -#' convertType = FALSE -#' ) -#' -#' rawPHsites_legacy <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), -#' characteristicName = "pH", -#' service = "Result", -#' dataProfile = "narrowResult" ) +#' # Phosphorus <- readWQPdata( +#' # statecode = "WI", countycode = "Dane", +#' # characteristicName = "Phosphorus", +#' # startDateLo = "2023-01-01", +#' # ignore_attributes = TRUE, +#' # convertType = FALSE +#' # ) +#' +#' #rawPHsites_legacy <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +#' # characteristicName = "pH", +#' # service = "Result", +#' # dataProfile = "narrowResult" ) #' #' # rawPHsites <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), #' # characteristicName = "pH", @@ -416,12 +416,12 @@ create_WQP_attributes <- function(retval, ...) { #' #' @examplesIf is_dataRetrieval_user() #' \donttest{ -#' rawPcode <- readWQPqw("USGS-01594440", "01075", -#' ignore_attributes = TRUE, legacy = FALSE) -#' headerInfo <- attr(rawPcode, "headerInfo") -#' wqp_request_id <- headerInfo$`wqp-request-id` -#' count_info <- wqp_check_status(wqp_request_id) -#' count_info[["dataProviders"]] +#' #rawPcode <- readWQPqw("USGS-01594440", "01075", +#' # ignore_attributes = TRUE, legacy = FALSE) +#' # headerInfo <- attr(rawPcode, "headerInfo") +#' #wqp_request_id <- headerInfo$`wqp-request-id` +#' #count_info <- wqp_check_status(wqp_request_id) +#' #count_info[["dataProviders"]] #' } wqp_check_status <- function(wqp_request_id) { id_url <- paste0(pkg.env[["status"]], wqp_request_id) diff --git a/R/readWQPqw.R b/R/readWQPqw.R index 0762b4418..92d5403b4 100644 --- a/R/readWQPqw.R +++ b/R/readWQPqw.R @@ -51,28 +51,28 @@ #' and [importWQP()] #' @examplesIf is_dataRetrieval_user() #' \donttest{ -#' rawPcode <- readWQPqw("USGS-01594440", "01075", "", "") +#' #rawPcode <- readWQPqw("USGS-01594440", "01075", "", "") #' -#' attr(rawPcode, "siteInfo") -#' attr(rawPcode, "queryTime") -#' attr(rawPcode, "url") +#' #attr(rawPcode, "siteInfo") +#' #attr(rawPcode, "queryTime") +#' #attr(rawPcode, "url") #' -#' rawCharacteristicName <- readWQPqw("WIDNR_WQX-10032762", "Specific conductance", "", "") -#' pHsites_legacy <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), -#' "pH", "", "") -#' ncol(pHsites_legacy) -#' attr(pHsites_legacy, "url") +#' #rawCharacteristicName <- readWQPqw("WIDNR_WQX-10032762", "Specific conductance", "", "") +#' #pHsites_legacy <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +#' # "pH", "", "") +#' #ncol(pHsites_legacy) +#' #attr(pHsites_legacy, "url") #' #' # pHsites_modern <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), #' # "pH", "", "", legacy = FALSE) #' # ncol(pHsites_modern) #' # attr(pHsites_modern, "url") #' -#' nwisEx <- readWQPqw("USGS-04024000", c("34247", "30234", "32104", "34220"), "", "2022-12-20") +#' # nwisEx <- readWQPqw("USGS-04024000", c("34247", "30234", "32104", "34220"), "", "2022-12-20") #' -#' DO <- readWQPqw(siteNumbers = "USGS-05288705", -#' parameterCd = "00300", -#' convertType = FALSE) +#' # DO <- readWQPqw(siteNumbers = "USGS-05288705", +#' # parameterCd = "00300", +#' # convertType = FALSE) #' } readWQPqw <- function( siteNumbers, diff --git a/R/whatWQPdata.R b/R/whatWQPdata.R index 48e576030..98e1e9988 100644 --- a/R/whatWQPdata.R +++ b/R/whatWQPdata.R @@ -6,14 +6,14 @@ #' @examples #' \donttest{ #' -#' site1 <- whatWQPsamples(siteid = "USGS-01594440") +#' #site1 <- whatWQPsamples(siteid = "USGS-01594440") #' -#' type <- "Stream" +#' #type <- "Stream" #' -#' sites <- whatWQPsamples(countycode = "US:55:025", siteType = type) +#' #sites <- whatWQPsamples(countycode = "US:55:025", siteType = type) #' -#' lakeSites_samples <- whatWQPsamples(siteType = "Lake, Reservoir, Impoundment", -#' countycode = "US:55:025") +#' #lakeSites_samples <- whatWQPsamples(siteType = "Lake, Reservoir, Impoundment", +#' # countycode = "US:55:025") #' } whatWQPsamples <- function(..., convertType = TRUE, legacy = TRUE) { values <- readWQPdots(..., legacy = legacy) @@ -81,11 +81,11 @@ whatWQPsamples <- function(..., convertType = TRUE, legacy = TRUE) { #' @examples #' \donttest{ #' -#' type <- "Stream" +#' #type <- "Stream" #' -#' sites <- whatWQPmetrics(countycode = "US:55:025", siteType = type) -#' lakeSites_metrics <- whatWQPmetrics(siteType = "Lake, Reservoir, Impoundment", -#' countycode = "US:55:025") +#' #sites <- whatWQPmetrics(countycode = "US:55:025", siteType = type) +#' #lakeSites_metrics <- whatWQPmetrics(siteType = "Lake, Reservoir, Impoundment", +#' # countycode = "US:55:025") #' } whatWQPmetrics <- function(..., convertType = TRUE) { values <- readWQPdots(..., legacy = TRUE) @@ -182,20 +182,20 @@ whatWQPmetrics <- function(..., convertType = TRUE) { #' @seealso whatWQPsites readWQPsummary readWQPdata #' @examplesIf is_dataRetrieval_user() #' \donttest{ -#' site1 <- whatWQPdata(siteid = "USGS-01594440") +#' #site1 <- whatWQPdata(siteid = "USGS-01594440") #' -#' type <- "Stream" -#' sites <- whatWQPdata(countycode = "US:55:025", siteType = type) +#' #type <- "Stream" +#' #sites <- whatWQPdata(countycode = "US:55:025", siteType = type) #' -#' lakeSites <- whatWQPdata(siteType = "Lake, Reservoir, Impoundment", -#' countycode = "US:55:025") -#' lakeSites_chars <- whatWQPdata( -#' siteType = "Lake, Reservoir, Impoundment", -#' countycode = "US:55:025", convertType = FALSE) +#' #lakeSites <- whatWQPdata(siteType = "Lake, Reservoir, Impoundment", +#' # countycode = "US:55:025") +#' #lakeSites_chars <- whatWQPdata( +#' # siteType = "Lake, Reservoir, Impoundment", +#' # countycode = "US:55:025", convertType = FALSE) #' #' -#' bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562) -#' what_bb <- whatWQPdata(bBox = bbox) +#' #bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562) +#' #what_bb <- whatWQPdata(bBox = bbox) #' } whatWQPdata <- function(..., convertType = TRUE) { args <- convertLists(...) diff --git a/R/whatWQPsites.R b/R/whatWQPsites.R index f18677309..08a2fde40 100644 --- a/R/whatWQPsites.R +++ b/R/whatWQPsites.R @@ -36,14 +36,14 @@ #' @examplesIf is_dataRetrieval_user() #' \donttest{ #' -#' site1 <- whatWQPsites(siteid = "USGS-01594440") +#' #site1 <- whatWQPsites(siteid = "USGS-01594440") #' -#' type <- "Stream" -#' sites <- whatWQPsites( -#' countycode = "US:55:025", -#' characteristicName = "Phosphorus", -#' siteType = type -#' ) +#' #type <- "Stream" +#' #sites <- whatWQPsites( +#' # countycode = "US:55:025", +#' # characteristicName = "Phosphorus", +#' # siteType = type +#' #) #' } whatWQPsites <- function(..., legacy = TRUE, convertType = TRUE) { values <- readWQPdots(..., legacy = legacy) @@ -118,36 +118,36 @@ whatWQPsites <- function(..., legacy = TRUE, convertType = TRUE) { #' @examplesIf is_dataRetrieval_user() #' \donttest{ #' # Summary of a single site for the last 5 years: -#' site_5 <- readWQPsummary( -#' siteid = "USGS-07144100", -#' summaryYears = 5 -#' ) +#' #site_5 <- readWQPsummary( +#' # siteid = "USGS-07144100", +#' # summaryYears = 5 +#'# ) #' #' # Summary of a single site for the full period of record: -#' site_all <- readWQPsummary( -#' siteid = "USGS-07144100", -#' summaryYears = "all" -#' ) +#' #site_all <- readWQPsummary( +#' # siteid = "USGS-07144100", +#' # summaryYears = "all" +#' # ) #' #' # Summary of the data available from streams in a single county: -#' dane_county_data <- readWQPsummary( -#' countycode = "US:55:025", -#' summaryYears = 5, -#' siteType = "Stream" -#' ) +#' # dane_county_data <- readWQPsummary( +#' # countycode = "US:55:025", +#' # summaryYears = 5, +#' # siteType = "Stream" +#' # ) #' #' # Summary of the data all available from lakes in a single county: -#' lake_sites <- readWQPsummary( -#' siteType = "Lake, Reservoir, Impoundment", -#' countycode = "US:55:025" -#' ) +#' # lake_sites <- readWQPsummary( +#' # siteType = "Lake, Reservoir, Impoundment", +#' # countycode = "US:55:025" +#' # ) #' #' # Summary of the data available for the last 5 years in New Jersey: -#' state1 <- readWQPsummary( -#' statecode = "NJ", -#' summaryYears = 5, -#' siteType = "Stream" -#' ) +#' #state1 <- readWQPsummary( +#' # statecode = "NJ", +#' # summaryYears = 5, +#' # siteType = "Stream" +#' # ) #' } readWQPsummary <- function(...) { wqp_message() diff --git a/man/readNWISstat.Rd b/man/readNWISstat.Rd index e2d9907cf..ab394f0a0 100644 --- a/man/readNWISstat.Rd +++ b/man/readNWISstat.Rd @@ -62,28 +62,28 @@ Retrieves site statistics from the USGS Statistics Web Service beta. \examples{ \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} \donttest{ -x1 <- readNWISstat( - siteNumbers = c("02319394"), - parameterCd = c("00060"), - statReportType = "annual" -) +# x1 <- readNWISstat( +# siteNumbers = c("02319394"), +# parameterCd = c("00060"), +# statReportType = "annual" +# ) # all the annual mean discharge data for two sites -x2 <- readNWISstat( - siteNumbers = c("02319394", "02171500"), - parameterCd = c("00010", "00060"), - statReportType = "annual" -) +#x2 <- readNWISstat( +# siteNumbers = c("02319394", "02171500"), +# parameterCd = c("00010", "00060"), +# statReportType = "annual" +# ) # Request p25, p75, and mean values for temperature and discharge for the 2000s # Note that p25 and p75 were not available for temperature, and return NAs -x <- readNWISstat( - siteNumbers = c("02171500"), - parameterCd = c("00010", "00060"), - statReportType = "daily", - statType = c("mean", "median"), - startDate = "2000", endDate = "2010" -) +#x <- readNWISstat( +# siteNumbers = c("02171500"), +# parameterCd = c("00010", "00060"), +# statReportType = "daily", +# statType = c("mean", "median"), +# startDate = "2000", endDate = "2010" +# ) } \dontshow{\}) # examplesIf} } diff --git a/man/readNWISuv.Rd b/man/readNWISuv.Rd index f2bf779c7..2878063f3 100644 --- a/man/readNWISuv.Rd +++ b/man/readNWISuv.Rd @@ -69,25 +69,10 @@ startDate <- "2014-10-10" endDate <- "2014-10-10" \donttest{ -rawData <- readNWISuv(site_id, parameterCd, startDate, endDate) +#rawData <- readNWISuv(site_id, parameterCd, startDate, endDate) -rawData_today <- readNWISuv(site_id, parameterCd, Sys.Date(), Sys.Date()) +#rawData_today <- readNWISuv(site_id, parameterCd, Sys.Date(), Sys.Date()) -timeZoneChange <- readNWISuv( - c("04024430", "04024000"), parameterCd, - "2013-11-03", "2013-11-03" -) - -centralTime <- readNWISuv(site_id, parameterCd, - "2014-10-10T12:00", "2014-10-10T23:59", - tz = "America/Chicago" -) - -# Adding 'Z' to the time indicates to the web service to call the data with UTC time: -GMTdata <- readNWISuv( - site_id, parameterCd, - "2014-10-10T00:00Z", "2014-10-10T23:59Z" -) } \dontshow{\}) # examplesIf} } diff --git a/man/readWQPdata.Rd b/man/readWQPdata.Rd index 68680d59c..a2a915b44 100644 --- a/man/readWQPdata.Rd +++ b/man/readWQPdata.Rd @@ -105,47 +105,47 @@ Sampling Activity \tab ActivityWQX3 \tab /wqx3/Activity/search \cr \donttest{ # Legacy: -nameToUse <- "pH" -pHData <- readWQPdata(siteid = "USGS-04024315", - characteristicName = nameToUse) -ncol(pHData) -attr(pHData, "siteInfo") -attr(pHData, "queryTime") -attr(pHData, "url") +#nameToUse <- "pH" +#pHData <- readWQPdata(siteid = "USGS-04024315", +# characteristicName = nameToUse) +#ncol(pHData) +#attr(pHData, "siteInfo") +#attr(pHData, "queryTime") +#attr(pHData, "url") # WQX3: -pHData_wqx3 <- readWQPdata(siteid = "USGS-04024315", - characteristicName = nameToUse, - service = "ResultWQX3", - dataProfile = "basicPhysChem") -attr(pHData_wqx3, "url") +#pHData_wqx3 <- readWQPdata(siteid = "USGS-04024315", +# characteristicName = nameToUse, +# service = "ResultWQX3", +# dataProfile = "basicPhysChem") +# attr(pHData_wqx3, "url") # More examples: # querying by county -DeWitt <- readWQPdata( - statecode = "Illinois", - countycode = "DeWitt", - characteristicName = "Nitrogen" -) +#DeWitt <- readWQPdata( +# statecode = "Illinois", +# countycode = "DeWitt", +# characteristicName = "Nitrogen" +# ) -attr(DeWitt, "url") +#attr(DeWitt, "url") -DeWitt_wqx3 <- readWQPdata( - statecode = "Illinois", - countycode = "DeWitt", - characteristicName = "Nitrogen", - service = "ResultWQX3", - dataProfile = "basicPhysChem", - ignore_attributes = TRUE) +#DeWitt_wqx3 <- readWQPdata( +# statecode = "Illinois", +# countycode = "DeWitt", +# characteristicName = "Nitrogen", +# service = "ResultWQX3", +# dataProfile = "basicPhysChem", +# ignore_attributes = TRUE) -attr(DeWitt_wqx3, "url") +#attr(DeWitt_wqx3, "url") # Data profile: "Sampling Activity" -activity <- readWQPdata( - siteid = "USGS-04024315", - service = "Activity" -) -attr(activity, "url") +#activity <- readWQPdata( +# siteid = "USGS-04024315", +# service = "Activity" +#) +#attr(activity, "url") # activity_wqx3 <- readWQPdata( # siteid = "USGS-04024315", @@ -153,14 +153,14 @@ attr(activity, "url") # ) # attr(activity_wqx3, "url") -Dane_activity <- readWQPdata( - statecode = "Wisconsin", - countycode = "Dane", - startDateLo = "2023-01-01", - startDateHi = "2023-12-31", - service = "Activity" -) -attr(Dane_activity, "url") +#Dane_activity <- readWQPdata( +# statecode = "Wisconsin", +# countycode = "Dane", +# startDateLo = "2023-01-01", +# startDateHi = "2023-12-31", +# service = "Activity" +# ) +#attr(Dane_activity, "url") # Dane_activity_wqx3 <- readWQPdata( # statecode = "Wisconsin", @@ -176,47 +176,47 @@ attr(Dane_activity, "url") # Data profiles: "Organization Data" -org_data <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "Organization" -) +#org_data <- readWQPdata( +# statecode = "WI", +# countycode = "Dane", +# service = "Organization" +# ) # Data profiles: "Project Data" -project_data <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "Project" -) +#project_data <- readWQPdata( +# statecode = "WI", +# countycode = "Dane", +# service = "Project" +# ) # Data profiles: "Project Monitoring Location Weighting Data" -proj_mlwd <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "ProjectMonitoringLocationWeighting" -) +#proj_mlwd <- readWQPdata( +# statecode = "WI", +# countycode = "Dane", +# service = "ProjectMonitoringLocationWeighting" +# ) # Data profiles: "Sample Results (physical/chemical metadata)" -samp_data <- readWQPdata( - siteid = "USGS-04024315", - dataProfile = "resultPhysChem", - service = "Result" -) +# samp_data <- readWQPdata( +# siteid = "USGS-04024315", +# dataProfile = "resultPhysChem", +# service = "Result" +# ) # Data profiles: "Sample Results (biological metadata)" -samp_bio <- readWQPdata( - siteid = "USGS-04024315", - dataProfile = "biological", - service = "Result" -) +#samp_bio <- readWQPdata( +# siteid = "USGS-04024315", +# dataProfile = "biological", +# service = "Result" +# ) # Data profiles: "Sample Results (narrow)" -samp_narrow <- readWQPdata( - siteid = "USGS-04024315", - service = "Result", - dataProfile = "narrowResult" -) +# samp_narrow <- readWQPdata( +# siteid = "USGS-04024315", +# service = "Result", +# dataProfile = "narrowResult" +# ) # samp_narrow_wqx3 <- readWQPdata( # siteid = "USGS-04024315", @@ -226,38 +226,38 @@ samp_narrow <- readWQPdata( # Data profiles: "Sampling Activity" -samp_activity <- readWQPdata( - siteid = "USGS-04024315", - dataProfile = "activityAll", - service = "Activity" -) +#samp_activity <- readWQPdata( +# siteid = "USGS-04024315", +# dataProfile = "activityAll", +# service = "Activity" +# ) # Data profile: "Sampling Activity Metrics" -act_metrics <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "ActivityMetric" -) +# act_metrics <- readWQPdata( +# statecode = "WI", +# countycode = "Dane", +# service = "ActivityMetric" +# ) # Data profile: "Result Detection Quantitation Limit Data" -dl_data <- readWQPdata( - siteid = "USGS-04024315", - service = "ResultDetectionQuantitationLimit" -) +# dl_data <- readWQPdata( +# siteid = "USGS-04024315", +# service = "ResultDetectionQuantitationLimit" +# ) # other options: -Phosphorus <- readWQPdata( - statecode = "WI", countycode = "Dane", - characteristicName = "Phosphorus", - startDateLo = "2023-01-01", - ignore_attributes = TRUE, - convertType = FALSE -) +# Phosphorus <- readWQPdata( +# statecode = "WI", countycode = "Dane", +# characteristicName = "Phosphorus", +# startDateLo = "2023-01-01", +# ignore_attributes = TRUE, +# convertType = FALSE +# ) -rawPHsites_legacy <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), - characteristicName = "pH", - service = "Result", - dataProfile = "narrowResult" ) +#rawPHsites_legacy <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +# characteristicName = "pH", +# service = "Result", +# dataProfile = "narrowResult" ) # rawPHsites <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), # characteristicName = "pH", diff --git a/man/readWQPqw.Rd b/man/readWQPqw.Rd index 5049d688e..e0fbd54d4 100644 --- a/man/readWQPqw.Rd +++ b/man/readWQPqw.Rd @@ -77,28 +77,28 @@ site name, such as 'USGS-01234567'. \examples{ \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} \donttest{ -rawPcode <- readWQPqw("USGS-01594440", "01075", "", "") +#rawPcode <- readWQPqw("USGS-01594440", "01075", "", "") -attr(rawPcode, "siteInfo") -attr(rawPcode, "queryTime") -attr(rawPcode, "url") +#attr(rawPcode, "siteInfo") +#attr(rawPcode, "queryTime") +#attr(rawPcode, "url") -rawCharacteristicName <- readWQPqw("WIDNR_WQX-10032762", "Specific conductance", "", "") -pHsites_legacy <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), - "pH", "", "") -ncol(pHsites_legacy) -attr(pHsites_legacy, "url") +#rawCharacteristicName <- readWQPqw("WIDNR_WQX-10032762", "Specific conductance", "", "") +#pHsites_legacy <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +# "pH", "", "") +#ncol(pHsites_legacy) +#attr(pHsites_legacy, "url") # pHsites_modern <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), # "pH", "", "", legacy = FALSE) # ncol(pHsites_modern) # attr(pHsites_modern, "url") -nwisEx <- readWQPqw("USGS-04024000", c("34247", "30234", "32104", "34220"), "", "2022-12-20") +# nwisEx <- readWQPqw("USGS-04024000", c("34247", "30234", "32104", "34220"), "", "2022-12-20") -DO <- readWQPqw(siteNumbers = "USGS-05288705", - parameterCd = "00300", - convertType = FALSE) +# DO <- readWQPqw(siteNumbers = "USGS-05288705", +# parameterCd = "00300", +# convertType = FALSE) } \dontshow{\}) # examplesIf} } diff --git a/man/readWQPsummary.Rd b/man/readWQPsummary.Rd index 9dfd922d0..a939331ee 100644 --- a/man/readWQPsummary.Rd +++ b/man/readWQPsummary.Rd @@ -36,36 +36,36 @@ available data at the WQP sites, and some metadata on the sites themselves. \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} \donttest{ # Summary of a single site for the last 5 years: -site_5 <- readWQPsummary( - siteid = "USGS-07144100", - summaryYears = 5 -) +#site_5 <- readWQPsummary( +# siteid = "USGS-07144100", +# summaryYears = 5 +# ) # Summary of a single site for the full period of record: -site_all <- readWQPsummary( - siteid = "USGS-07144100", - summaryYears = "all" -) +#site_all <- readWQPsummary( +# siteid = "USGS-07144100", +# summaryYears = "all" +# ) # Summary of the data available from streams in a single county: -dane_county_data <- readWQPsummary( - countycode = "US:55:025", - summaryYears = 5, - siteType = "Stream" -) +# dane_county_data <- readWQPsummary( +# countycode = "US:55:025", +# summaryYears = 5, +# siteType = "Stream" +# ) # Summary of the data all available from lakes in a single county: -lake_sites <- readWQPsummary( - siteType = "Lake, Reservoir, Impoundment", - countycode = "US:55:025" -) +# lake_sites <- readWQPsummary( +# siteType = "Lake, Reservoir, Impoundment", +# countycode = "US:55:025" +# ) # Summary of the data available for the last 5 years in New Jersey: -state1 <- readWQPsummary( - statecode = "NJ", - summaryYears = 5, - siteType = "Stream" -) +#state1 <- readWQPsummary( +# statecode = "NJ", +# summaryYears = 5, +# siteType = "Stream" +# ) } \dontshow{\}) # examplesIf} } diff --git a/man/read_waterdata_combined_meta.Rd b/man/read_waterdata_combined_meta.Rd index f5f2e4de1..5ce5b3193 100644 --- a/man/read_waterdata_combined_meta.Rd +++ b/man/read_waterdata_combined_meta.Rd @@ -91,7 +91,7 @@ Multiple statistic_ids can be requested as a character vector.} \item{data_type}{The computational period type of data collected at the monitoring location.} -\item{computation_identifier}{Indicates whether the data from this time series represent a specific statistical computation. +\item{computation_identifier}{Indicates the computation performed to calculate this time series. Values of "Instantaneous" reflect point measurements. Multiple computation_identifiers can be requested as a character vector.} \item{computation_period_identifier}{Multiple computation_period_identifiers can be requested as a character vector.} @@ -108,11 +108,11 @@ for timely best science and to assist with daily operations which need real-time information. Non-primary time series data are only retained by this system for 120 days.} -\item{web_description}{A description of what this time series represents, as used by WDFN and other USGS data dissemination products.} +\item{web_description}{An optional description of the time series. WDFN and other USGS data dissemination products use this field, in combination with sublocation_identifier, to distinguish the differences between multiple time series for the same parameter code, statistic code, and monitoring location.} \item{parent_time_series_id}{The unique identifier representing the parent or "upchain" time series that a daily values time series is generated from. Daily values time series have one and only one parent time series.} -\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. +\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end_utc}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -122,11 +122,11 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{begin} that intersects the value of datetime are selected. +Only features that have a \code{begin_utc} that intersects the value of datetime are selected. See also Details below for more information.} -\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end} value reflects. Together with \code{begin}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". +\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end_utc} value reflects. Together with \code{begin_utc}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -136,7 +136,7 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{end} that intersects the value of datetime are selected. +Only features that have a \code{end_utc} that intersects the value of datetime are selected. See also Details below for more information.} diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd index 508d2c9cd..8242d19e6 100644 --- a/man/read_waterdata_field_measurements.Rd +++ b/man/read_waterdata_field_measurements.Rd @@ -41,7 +41,7 @@ Multiple parameter_codes can be requested as a character vector.} \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, field_measurement_id, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified, control_condition, measurement_rated. +geometry, field_measurement_id, field_measurements_series_id, field_visit_id, parameter_code, monitoring_location_id, observing_procedure_code, observing_procedure, value, unit_of_measure, time, qualifier, vertical_datum, approval_status, measuring_agency, last_modified, control_condition, measurement_rated. The default (\code{NA}) will return all columns of the data.} \item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} @@ -74,9 +74,13 @@ See also Details below for more information.} \item{measuring_agency}{The agency performing the measurement.} -\item{control_condition}{What and where the control of flow is for the gage pool.} +\item{control_condition}{The state of the control feature at the time of observation. -\item{measurement_rated}{Rated measurement based on the hydrologic/hydraulic conditions in which the measurement was made +What and where the control of flow is for the gage pool.} + +\item{measurement_rated}{A qualitative estimate of the quality of a measurement. + +Rated measurement based on the hydrologic/hydraulic conditions in which the measurement was made (excellent (2 percent), good (5 percent), fair (8 percent), or poor (more than 8 percent). percent)} \item{skipGeometry}{This option can be used to skip response geometries for diff --git a/man/read_waterdata_field_meta.Rd b/man/read_waterdata_field_meta.Rd index 56b9af00b..83d6bc752 100644 --- a/man/read_waterdata_field_meta.Rd +++ b/man/read_waterdata_field_meta.Rd @@ -36,7 +36,7 @@ Multiple parameter_names can be requested as a character vector.} Multiple parameter_descriptions can be requested as a character vector.} -\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. +\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end_utc}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -46,11 +46,11 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{begin} that intersects the value of datetime are selected. +Only features that have a \code{begin_utc} that intersects the value of datetime are selected. See also Details below for more information.} -\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end} value reflects. Together with \code{begin}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". +\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end_utc} value reflects. Together with \code{begin_utc}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -60,7 +60,7 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{end} that intersects the value of datetime are selected. +Only features that have a \code{end_utc} that intersects the value of datetime are selected. See also Details below for more information.} diff --git a/man/read_waterdata_monitoring_location.Rd b/man/read_waterdata_monitoring_location.Rd index b8e87b7d0..178cad7a9 100644 --- a/man/read_waterdata_monitoring_location.Rd +++ b/man/read_waterdata_monitoring_location.Rd @@ -52,7 +52,7 @@ read_waterdata_monitoring_location( ) } \arguments{ -\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{monitoring_location_id} field in other endpoints. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). Multiple monitoring_location_ids can be requested as a character vector.} diff --git a/man/read_waterdata_ts_meta.Rd b/man/read_waterdata_ts_meta.Rd index 181b237f0..7a878d240 100644 --- a/man/read_waterdata_ts_meta.Rd +++ b/man/read_waterdata_ts_meta.Rd @@ -68,7 +68,7 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{begin_utc}{The datetime of the earliest observation in the time series. Together with \code{end}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. +\item{begin_utc}{The datetime of the earliest observation in the time series. Together with \code{end_utc}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -78,12 +78,12 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{begin} that intersects the value of datetime are selected. +Only features that have a \code{begin_utc} that intersects the value of datetime are selected. #' See also Details below for more information.} -\item{end_utc}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end} value reflects. Together with \code{begin}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". +\item{end_utc}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end_utc} value reflects. Together with \code{begin_utc}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: \itemize{ @@ -93,7 +93,7 @@ Examples: \item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours } -Only features that have a \code{end} that intersects the value of datetime are selected. +Only features that have a \code{end_utc} that intersects the value of datetime are selected. See also Details below for more information.} @@ -106,7 +106,7 @@ See also Details below for more information.} \item{computation_period_identifier}{Indicates the period of data used for any statistical computations. Multiple computation_period_identifiers can be requested as a character vector.} -\item{computation_identifier}{Indicates whether the data from this time series represent a specific statistical computation. +\item{computation_identifier}{Indicates the computation performed to calculate this time series. Values of "Instantaneous" reflect point measurements. Multiple computation_identifiers can be requested as a character vector.} \item{thresholds}{Thresholds represent known numeric limits for a time series, for example the historic maximum value for a parameter or a level below which a sensor is non-operative. These thresholds are sometimes used to automatically determine if an observation is erroneous due to sensor error, and therefore shouldn't be included in the time series.} @@ -123,9 +123,9 @@ this system for 120 days.} \item{parent_time_series_id}{The unique identifier representing the parent or "upchain" time series that a daily values time series is generated from. Daily values time series have one and only one parent time series.} -\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} +\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the "time_series_id" field in other endpoints.} -\item{web_description}{A description of what this time series represents, as used by WDFN and other USGS data dissemination products.} +\item{web_description}{An optional description of the time series. WDFN and other USGS data dissemination products use this field, in combination with sublocation_identifier, to distinguish the differences between multiple time series for the same parameter code, statistic code, and monitoring location.} \item{skipGeometry}{This option can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial diff --git a/man/whatWQPdata.Rd b/man/whatWQPdata.Rd index 8b02c97fb..b16183d06 100644 --- a/man/whatWQPdata.Rd +++ b/man/whatWQPdata.Rd @@ -46,20 +46,20 @@ kind of data discovery see \code{readWQPsummary}. \examples{ \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} \donttest{ -site1 <- whatWQPdata(siteid = "USGS-01594440") +#site1 <- whatWQPdata(siteid = "USGS-01594440") -type <- "Stream" -sites <- whatWQPdata(countycode = "US:55:025", siteType = type) +#type <- "Stream" +#sites <- whatWQPdata(countycode = "US:55:025", siteType = type) -lakeSites <- whatWQPdata(siteType = "Lake, Reservoir, Impoundment", - countycode = "US:55:025") -lakeSites_chars <- whatWQPdata( - siteType = "Lake, Reservoir, Impoundment", - countycode = "US:55:025", convertType = FALSE) +#lakeSites <- whatWQPdata(siteType = "Lake, Reservoir, Impoundment", +# countycode = "US:55:025") +#lakeSites_chars <- whatWQPdata( +# siteType = "Lake, Reservoir, Impoundment", +# countycode = "US:55:025", convertType = FALSE) -bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562) -what_bb <- whatWQPdata(bBox = bbox) +#bbox <- c(-86.9736, 34.4883, -86.6135, 34.6562) +#what_bb <- whatWQPdata(bBox = bbox) } \dontshow{\}) # examplesIf} } diff --git a/man/wqpSpecials.Rd b/man/wqpSpecials.Rd index 233435505..a0d0fd848 100644 --- a/man/wqpSpecials.Rd +++ b/man/wqpSpecials.Rd @@ -51,34 +51,34 @@ not return information on what data was collected at the site. \examples{ \donttest{ -site1 <- whatWQPsamples(siteid = "USGS-01594440") +#site1 <- whatWQPsamples(siteid = "USGS-01594440") -type <- "Stream" +#type <- "Stream" -sites <- whatWQPsamples(countycode = "US:55:025", siteType = type) +#sites <- whatWQPsamples(countycode = "US:55:025", siteType = type) -lakeSites_samples <- whatWQPsamples(siteType = "Lake, Reservoir, Impoundment", - countycode = "US:55:025") +#lakeSites_samples <- whatWQPsamples(siteType = "Lake, Reservoir, Impoundment", +# countycode = "US:55:025") } \donttest{ -type <- "Stream" +#type <- "Stream" -sites <- whatWQPmetrics(countycode = "US:55:025", siteType = type) -lakeSites_metrics <- whatWQPmetrics(siteType = "Lake, Reservoir, Impoundment", - countycode = "US:55:025") +#sites <- whatWQPmetrics(countycode = "US:55:025", siteType = type) +#lakeSites_metrics <- whatWQPmetrics(siteType = "Lake, Reservoir, Impoundment", +# countycode = "US:55:025") } \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} \donttest{ -site1 <- whatWQPsites(siteid = "USGS-01594440") +#site1 <- whatWQPsites(siteid = "USGS-01594440") -type <- "Stream" -sites <- whatWQPsites( - countycode = "US:55:025", - characteristicName = "Phosphorus", - siteType = type -) +#type <- "Stream" +#sites <- whatWQPsites( +# countycode = "US:55:025", +# characteristicName = "Phosphorus", +# siteType = type +#) } \dontshow{\}) # examplesIf} } diff --git a/man/wqp_check_status.Rd b/man/wqp_check_status.Rd index cccd26900..492bc80a1 100644 --- a/man/wqp_check_status.Rd +++ b/man/wqp_check_status.Rd @@ -23,12 +23,12 @@ function will be attached as an attribute to the data. \examples{ \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} \donttest{ -rawPcode <- readWQPqw("USGS-01594440", "01075", - ignore_attributes = TRUE, legacy = FALSE) -headerInfo <- attr(rawPcode, "headerInfo") -wqp_request_id <- headerInfo$`wqp-request-id` -count_info <- wqp_check_status(wqp_request_id) -count_info[["dataProviders"]] +#rawPcode <- readWQPqw("USGS-01594440", "01075", +# ignore_attributes = TRUE, legacy = FALSE) +# headerInfo <- attr(rawPcode, "headerInfo") +#wqp_request_id <- headerInfo$`wqp-request-id` +#count_info <- wqp_check_status(wqp_request_id) +#count_info[["dataProviders"]] } \dontshow{\}) # examplesIf} } From 6d6a8f01074440a58d7ab4fd26368cc9cfbb0767 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 30 Apr 2026 10:29:57 -0500 Subject: [PATCH 10/29] Switch to samples in vignette --- tests/testthat/tests_general.R | 166 +++++++++++------------ tests/testthat/tests_userFriendly_fxns.R | 68 +++++----- vignettes/join_by_closest.Rmd | 20 +-- 3 files changed, 129 insertions(+), 125 deletions(-) diff --git a/tests/testthat/tests_general.R b/tests/testthat/tests_general.R index ce207f440..43ff223bc 100644 --- a/tests/testthat/tests_general.R +++ b/tests/testthat/tests_general.R @@ -295,16 +295,16 @@ test_that("read_waterdata_ts_meta", { test_that("General WQP retrievals working", { testthat::skip_on_cran() testthat::skip_on_ci() - nameToUse <- "pH" - pHData <- readWQPdata( - siteid = "USGS-04024315", - characteristicName = nameToUse, - service = "ResultWQX3" - ) - expect_is(pHData$Activity_StartDateTime, "POSIXct") - expect_type(pHData$USGSpcode, "character") - expect_type(pHData$Result_Measure, "double") - expect_type(pHData$SampleCollectionMethod_Identifier, "character") + # nameToUse <- "pH" + # pHData <- readWQPdata( + # siteid = "USGS-04024315", + # characteristicName = nameToUse, + # service = "ResultWQX3" + # ) + # expect_is(pHData$Activity_StartDateTime, "POSIXct") + # expect_type(pHData$USGSpcode, "character") + # expect_type(pHData$Result_Measure, "double") + # expect_type(pHData$SampleCollectionMethod_Identifier, "character") # # # testing lists: startDate <- as.Date("2022-01-01") @@ -335,80 +335,80 @@ test_that("General WQP retrievals working", { characteristicName = secchi.names ) - wqp.summary_no_atts <- readWQPdata( - siteid = "USGS-04024315", - characteristicName = nameToUse, - ignore_attributes = TRUE, - service = "ResultWQX3" - ) - expect_true( - !all( - c("siteInfo", "variableInfo") %in% names(attributes(wqp.summary_no_atts)) - ) - ) - - rawPcode <- readWQPqw("USGS-01594440", "01075", "", "", legacy = FALSE) - expect_true(all( - c("url", "queryTime", "siteInfo") %in% - names(attributes(rawPcode)) - )) - - # This means wqp_check_status was called: - expect_true("dataProviders" %in% names(attr(rawPcode, "headerInfo"))) - - rawPcode2 <- readWQPqw( - "USGS-01594440", - "01075", - "", - "", - ignore_attributes = TRUE - ) - expect_true(all( - !c("queryTime", "siteInfo") %in% - names(attributes(rawPcode2)) - )) - - # This means wqp_check_status wasn't called: - expect_false("dataProviders" %in% names(attr(rawPcode2, "headerInfo"))) - - pHData <- readWQPdata( - siteid = "USGS-04024315", - characteristicName = "pH", - service = "ResultWQX3" - ) - expect_true(all( - c("url", "queryTime", "siteInfo", "headerInfo") %in% - names(attributes(pHData)) - )) - - # # This means wqp_check_status was called: - expect_true("dataProviders" %in% names(attr(pHData, "headerInfo"))) - - pHData2 <- readWQPdata( - siteid = "USGS-04024315", - characteristicName = "pH", - ignore_attributes = TRUE, - service = "ResultWQX3" - ) - expect_true(all( - !c("queryTime", "siteInfo") %in% - names(attributes(pHData2)) - )) - + # wqp.summary_no_atts <- readWQPdata( + # siteid = "USGS-04024315", + # characteristicName = nameToUse, + # ignore_attributes = TRUE, + # service = "ResultWQX3" + # ) + # expect_true( + # !all( + # c("siteInfo", "variableInfo") %in% names(attributes(wqp.summary_no_atts)) + # ) + # ) + # + # rawPcode <- readWQPqw("USGS-01594440", "01075", "", "", legacy = FALSE) + # expect_true(all( + # c("url", "queryTime", "siteInfo") %in% + # names(attributes(rawPcode)) + # )) + # # # This means wqp_check_status was called: - expect_false("dataProviders" %in% names(attr(pHData2, "headerInfo"))) - - rawPcode <- readWQPqw( - "USGS-01594440", - "01075", - ignore_attributes = TRUE, - legacy = FALSE - ) - headerInfo <- attr(rawPcode, "headerInfo") - wqp_request_id <- headerInfo$`wqp-request-id` - count_info <- wqp_check_status(wqp_request_id) - - expect_true("dataProviders" %in% names(count_info)) + # expect_true("dataProviders" %in% names(attr(rawPcode, "headerInfo"))) + # + # rawPcode2 <- readWQPqw( + # "USGS-01594440", + # "01075", + # "", + # "", + # ignore_attributes = TRUE + # ) + # expect_true(all( + # !c("queryTime", "siteInfo") %in% + # names(attributes(rawPcode2)) + # )) + # + # # This means wqp_check_status wasn't called: + # expect_false("dataProviders" %in% names(attr(rawPcode2, "headerInfo"))) + # + # pHData <- readWQPdata( + # siteid = "USGS-04024315", + # characteristicName = "pH", + # service = "ResultWQX3" + # ) + # expect_true(all( + # c("url", "queryTime", "siteInfo", "headerInfo") %in% + # names(attributes(pHData)) + # )) + # + # # # This means wqp_check_status was called: + # expect_true("dataProviders" %in% names(attr(pHData, "headerInfo"))) + # + # pHData2 <- readWQPdata( + # siteid = "USGS-04024315", + # characteristicName = "pH", + # ignore_attributes = TRUE, + # service = "ResultWQX3" + # ) + # expect_true(all( + # !c("queryTime", "siteInfo") %in% + # names(attributes(pHData2)) + # )) + # + # # # This means wqp_check_status was called: + # expect_false("dataProviders" %in% names(attr(pHData2, "headerInfo"))) + # + # rawPcode <- readWQPqw( + # "USGS-01594440", + # "01075", + # ignore_attributes = TRUE, + # legacy = FALSE + # ) + # headerInfo <- attr(rawPcode, "headerInfo") + # wqp_request_id <- headerInfo$`wqp-request-id` + # count_info <- wqp_check_status(wqp_request_id) + # + # expect_true("dataProviders" %in% names(count_info)) }) diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index befb1d3ab..68e24645f 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -233,40 +233,40 @@ test_that("read_waterdata_daily", { test_that("WQP qw tests", { testthat::skip_on_cran() skip_on_ci() - nameToUse <- "Specific conductance" - pcodeToUse <- "00095" - - INFO_WQP <- readWQPqw( - "USGS-04024315", - pcodeToUse, - startDate = "", - endDate = "", - legacy = FALSE - ) - expect_is(INFO_WQP$Activity_StartDateTime, "POSIXct") - - INFO2 <- readWQPqw( - "WIDNR_WQX-10032762", - nameToUse, - startDate = "", - endDate = "", - legacy = FALSE - ) - expect_is(INFO2$Activity_StartDateTime, "POSIXct") - - df <- readWQPqw("USGS-04193500", parameterCd = "00665", legacy = FALSE) - expect_true(nrow(df) > 0) - - df2 <- readWQPqw("USGS-05427718", parameterCd = "all") - expect_true(nrow(df2) > 0) - - #Empty legacy: - df3 <- readWQPqw( - siteNumbers = "USGS-385032115220501", - parameterCd = "all", - legacy = TRUE - ) - expect_true(nrow(df3) == 0) + # nameToUse <- "Specific conductance" + # pcodeToUse <- "00095" + # + # INFO_WQP <- readWQPqw( + # "USGS-04024315", + # pcodeToUse, + # startDate = "", + # endDate = "", + # legacy = FALSE + # ) + # expect_is(INFO_WQP$Activity_StartDateTime, "POSIXct") + # + # INFO2 <- readWQPqw( + # "WIDNR_WQX-10032762", + # nameToUse, + # startDate = "", + # endDate = "", + # legacy = FALSE + # ) + # expect_is(INFO2$Activity_StartDateTime, "POSIXct") + # + # df <- readWQPqw("USGS-04193500", parameterCd = "00665", legacy = FALSE) + # expect_true(nrow(df) > 0) + # + # df2 <- readWQPqw("USGS-05427718", parameterCd = "all") + # expect_true(nrow(df2) > 0) + # + # #Empty legacy: + # df3 <- readWQPqw( + # siteNumbers = "USGS-385032115220501", + # parameterCd = "all", + # legacy = TRUE + # ) + # expect_true(nrow(df3) == 0) }) diff --git a/vignettes/join_by_closest.Rmd b/vignettes/join_by_closest.Rmd index 8341c4df8..f8018466c 100644 --- a/vignettes/join_by_closest.Rmd +++ b/vignettes/join_by_closest.Rmd @@ -46,9 +46,13 @@ pcode_uv <- "99133" pcode_samples <- "00631" start_date <- as.Date("2018-01-01") end_date <- as.Date("2020-01-01") -samples_data <- readWQPqw(site_samples, pcode_samples, - startDate = start_date, - endDate = end_date) + +samples_data <- read_waterdata_samples( + monitoringLocationIdentifier = site_samples, + usgsPCode = pcode_samples, + activityStartDateUpper = end_date, + activityStartDateLower = start_date, + dataProfile = "fullphyschem") uv_data <- read_waterdata_continuous(monitoring_location_id = site_uv, parameter_code = c(pcode_uv), @@ -60,11 +64,11 @@ Next we'll clean up the discrete water quality data to make it easy to follow in ```{r trimsamplesdata} samples_trim <- samples_data |> - filter(ActivityTypeCode == "Sample-Routine", - !is.na(ActivityStartDateTime)) |> - select(samples_date = ActivityStartDateTime, - val_samples = ResultMeasureValue, - det_txt = ResultDetectionConditionText) + filter(Activity_TypeCode == "Sample-Routine", + !is.na(Activity_StartDateTime)) |> + select(samples_date = Activity_StartDateTime, + val_samples = Result_Measure, + det_txt = Result_ResultDetectionCondition) ``` ```{r showsamplestrim, echo=FALSE} From 66fe712effcca9ff03ceeeff3fc6061cff741321 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Fri, 1 May 2026 11:39:52 -0500 Subject: [PATCH 11/29] Peaks in vignettes --- R/readNWISunit.R | 21 ++++++++++-------- R/read_waterdata_peaks.R | 2 +- man/readNWISpeak.Rd | 10 ++++----- man/readNWISrating.Rd | 4 ++-- man/read_waterdata_peaks.Rd | 2 +- tests/testthat/tests_userFriendly_fxns.R | 8 +++---- vignettes/Status.Rmd | 10 +++++---- vignettes/dataRetrieval.Rmd | 28 +++++++++++++----------- vignettes/tutorial.Rmd | 4 ++-- 9 files changed, 48 insertions(+), 41 deletions(-) diff --git a/R/readNWISunit.R b/R/readNWISunit.R index 1a28cef36..fd6f5c88c 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -156,12 +156,12 @@ readNWISuv <- function( #' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() -#' site_ids <- c("01594440", "040851325") +#' #site_ids <- c("01594440", "040851325") #' \donttest{ -#' data <- readNWISpeak(site_ids) -#' data2 <- readNWISpeak(site_ids, asDateTime = FALSE) -#' stations <- c("06011000") -#' peakdata <- readNWISpeak(stations, convertType = FALSE) +#' #data <- readNWISpeak(site_ids) +#' #data2 <- readNWISpeak(site_ids, asDateTime = FALSE) +#' #stations <- c("06011000") +#' #peakdata <- readNWISpeak(stations, convertType = FALSE) #' } readNWISpeak <- function( siteNumbers, @@ -170,8 +170,11 @@ readNWISpeak <- function( asDateTime = TRUE, convertType = TRUE ) { - message(new_nwis_message()) - + .Deprecated( + new = "read_waterdata_peaks", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_peaks." + ) # Doesn't seem to be a peak xml service url <- constructNWISURL( siteNumbers = siteNumbers, @@ -261,8 +264,8 @@ readNWISpeak <- function( #' @examplesIf is_dataRetrieval_user() #' site_id <- "01594440" #' \donttest{ -#' data <- readNWISrating(site_id, "base") -#' attr(data, "RATING") +#' #data <- readNWISrating(site_id, "base") +#' #attr(data, "RATING") #' } readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { .Deprecated( diff --git a/R/read_waterdata_peaks.R b/R/read_waterdata_peaks.R index 7ea265d8a..3837a4c9a 100644 --- a/R/read_waterdata_peaks.R +++ b/R/read_waterdata_peaks.R @@ -42,7 +42,7 @@ #' #' #' dv_data_sf <- read_waterdata_peaks( -#' monitoring_location_id = site, +#' monitoring_location_id = wi_peaks$monitoring_location_id[1], #' parameter_code = "00060") #' #' } diff --git a/man/readNWISpeak.Rd b/man/readNWISpeak.Rd index f36a4007b..c07c8a96a 100644 --- a/man/readNWISpeak.Rd +++ b/man/readNWISpeak.Rd @@ -77,12 +77,12 @@ R Date objects. } \examples{ \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} -site_ids <- c("01594440", "040851325") +#site_ids <- c("01594440", "040851325") \donttest{ -data <- readNWISpeak(site_ids) -data2 <- readNWISpeak(site_ids, asDateTime = FALSE) -stations <- c("06011000") -peakdata <- readNWISpeak(stations, convertType = FALSE) +#data <- readNWISpeak(site_ids) +#data2 <- readNWISpeak(site_ids, asDateTime = FALSE) +#stations <- c("06011000") +#peakdata <- readNWISpeak(stations, convertType = FALSE) } \dontshow{\}) # examplesIf} } diff --git a/man/readNWISrating.Rd b/man/readNWISrating.Rd index 0aaafcb90..cf520780a 100644 --- a/man/readNWISrating.Rd +++ b/man/readNWISrating.Rd @@ -48,8 +48,8 @@ relate flow to stage. \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} site_id <- "01594440" \donttest{ -data <- readNWISrating(site_id, "base") -attr(data, "RATING") +#data <- readNWISrating(site_id, "base") +#attr(data, "RATING") } \dontshow{\}) # examplesIf} } diff --git a/man/read_waterdata_peaks.Rd b/man/read_waterdata_peaks.Rd index 5a3c77e53..b809a24c6 100644 --- a/man/read_waterdata_peaks.Rd +++ b/man/read_waterdata_peaks.Rd @@ -160,7 +160,7 @@ wi_peaks <- read_waterdata_combined_meta( dv_data_sf <- read_waterdata_peaks( - monitoring_location_id = site, + monitoring_location_id = wi_peaks$monitoring_location_id[1], parameter_code = "00060") } diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 901fec4ea..393c13b4b 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -113,11 +113,11 @@ context("Peak, rating, meas, site") test_that("peak, rating curves, surface-water measurements", { testthat::skip_on_cran() testthat::skip_on_ci() - siteNumbers <- c("01594440", "040851325") - data <- readNWISpeak(siteNumbers) - expect_is(data$agency_cd, "character") + siteNumbers <- c("USGS-01594440", "USGS-040851325") + data <- read_waterdata_peaks(monitoring_location_id = siteNumbers) + expect_true(ncol(data) > 10) - # Rating curvs: + # Rating curves: siteNumber <- "USGS-01594440" data <- read_waterdata_ratings( monitoring_location_id = siteNumber, diff --git a/vignettes/Status.Rmd b/vignettes/Status.Rmd index f0fcd3a21..f6aab2006 100644 --- a/vignettes/Status.Rmd +++ b/vignettes/Status.Rmd @@ -60,18 +60,20 @@ df <- data.frame( "`read_waterdata_field_measurements`, `read_waterdata_channel`", "`read_waterdata`", "`read_waterdata_continuous`", - "", #rating + "`read_waterdata_rating`", #rating "`read_waterdata_stats_por`, `read_waterdata_stats_daterange`", - rep("", 2), + "`read_waterdata_peaks`", + "", "`read_waterdata_ts_meta`, `read_waterdata_field_meta`, `read_waterdata_combined_meta`" ), "Available on (branch)" = c(rep("main (CRAN)", 6), "main (CRAN)", "main (CRAN)", "main (CRAN)", - "", + "develop", "main (CRAN)", - rep("", 2), + "develop", + "", "main (CRAN)") ) diff --git a/vignettes/dataRetrieval.Rmd b/vignettes/dataRetrieval.Rmd index a89c25566..d5b3c69ca 100644 --- a/vignettes/dataRetrieval.Rmd +++ b/vignettes/dataRetrieval.Rmd @@ -71,9 +71,9 @@ Functions <- c( "read_waterdata", "read_waterdata_daily", "read_waterdata_continuous", - "readNWISrating", + "read_waterdata_rating", "read_waterdata_field_measurements", - "readNWISpeak", + "read_waterdata_peaks", "read_waterdata_field_measurements", "readNWISuse", "read_waterdata_stats_por, read_waterdata_stats_daterange", @@ -81,7 +81,7 @@ Functions <- c( "read_waterdata_monitoring_location", "read_waterdata_samples", "summarize_waterdata_samples", - "whatNWISsites", + "read_waterdata_combined_meta", "read_waterdata_ts_meta", "readWQPdata", "readWQPqw", @@ -119,16 +119,17 @@ Description <- c( Source <- c("USGS Water Data API", "USGS Water Data API", "USGS Water Data API", - "NWIS", + "USGS Water Data API", + "USGS Water Data API", + "USGS Water Data API", "USGS Water Data API", "NWIS", "USGS Water Data API", - rep("NWIS",2), "USGS Water Data API", "USGS Water Data API", "USGS Samples Data", "USGS Samples Data", - "NWIS", + "USGS Water Data API", "USGS Water Data API", rep("WQP", 7)) @@ -139,7 +140,7 @@ data.df <- data.frame( Source, stringsAsFactors = FALSE ) -kable(data.df, +kable(data.df, col.names = c("Name", "Data Returned", "Source"), caption = "Table 1: dataRetrieval functions" ) ``` @@ -381,11 +382,11 @@ groundWater <- read_waterdata_field_measurements(monitoring_location_id = siteNu ## Peak Flow Data -Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the `readNWISpeak` function. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. +Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the `read_waterdata_peaks` function. ```{r peakexample, echo=TRUE, eval=FALSE} -siteNumber <- "01594440" -peakData <- readNWISpeak(siteNumber) +siteNumber <- "USGS-01594440" +peakData <- read_waterdata_peaks(monitoring_location_id = siteNumber) ``` @@ -393,11 +394,12 @@ peakData <- readNWISpeak(siteNumber) Rating curves are the calibration curves that are used to convert measurements of stage to discharge. Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. -Rating curves can be obtained with the `readNWISrating` function. +Rating curves can be obtained with the `read_waterdata_ratings` function. ```{r ratingexample, echo=TRUE, eval=FALSE} -ratingData <- readNWISrating(siteNumber, "base") -attr(ratingData, "RATING") +ratingData <- read_waterdata_ratings(monitoring_location_id = siteNumber, + file_type = "base") +comment(ratingData[[1]]) ``` diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 19651ee65..e97fa1cf5 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -112,8 +112,8 @@ There are many types of data available from `dataRetrieval`. To understand how t |site|`readNWISsite`|Site metadata| `read_waterdata_monitoring_location` | |pcode|`readNWISpCode`|Parameter code metadata | `read_waterdata_parameter_codes` | |stat|`readNWISstat`| Site statistics | `read_waterdata_stats_por`, `read_waterdata_stats_daterange` | -|rating|`readNWISrating`| Rating curves| None yet | -|peak|`readNWISpeak`|Peak flow| None yet | +|rating|`readNWISrating`| Rating curves| `read_waterdata_ratings` | +|peak|`readNWISpeak`|Peak flow| `read_waterdata_peaks` | |use|`readNWISuse`|Water Use| None yet | |meas|`readNWISmeas`|Discrete surface water| `read_waterdata_field_measurements`| | | `readNWISdata` | General data import | `read_waterdata` | From 00b2d08abe605b32659a628c8baa35f233b4b3d5 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Fri, 1 May 2026 11:52:45 -0500 Subject: [PATCH 12/29] Add to NEWS --- NEWS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 2a00027a4..1ba12dba6 100644 --- a/NEWS +++ b/NEWS @@ -2,8 +2,10 @@ dataRetrieval 2.7.25 =================== * Added read_waterdata_ratings to access USGS rating curves with new modern endpoint. +* Added read_waterdata_peaks to access USGS peak data with +new modern endpoint. * Increase flexibility of chunking by monitoring_location_id by -including it as an argument in each relevant function. +including it as an argument in each relevant waterdata OGC function. * Clean up deprecated code. * Updated retry strategy to include retry_on_failure = TRUE. * Added countries, methods, method-categories, method-citations, and From 04b555a2e10693fdb17d1452927552be71faac7b Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Fri, 1 May 2026 13:56:50 -0500 Subject: [PATCH 13/29] cleaned up doc code --- R/read_waterdata_samples.R | 2 +- man/construct_waterdata_sample_request.Rd | 2 +- man/read_waterdata_samples.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/read_waterdata_samples.R b/R/read_waterdata_samples.R index f6d45b0f3..f62bbed6a 100644 --- a/R/read_waterdata_samples.R +++ b/R/read_waterdata_samples.R @@ -66,7 +66,7 @@ #' @param recordIdentifierUserSupplied Record identifier, user supplied identifier. This #' information would be needed from the data supplier. #' @param siteTypeName Site type name query parameter. See available -#' options by running `check_param("sitetype")$typeName`. +#' options by running `check_waterdata_sample_params("sitetype")$typeName`. #' @param usgsPCode USGS parameter code. See available options by running #' `check_waterdata_sample_params("characteristics")$parameterCode`. #' @param pointLocationLatitude Latitude for a point/radius query (decimal degrees). Must be used diff --git a/man/construct_waterdata_sample_request.Rd b/man/construct_waterdata_sample_request.Rd index e6489e925..0ab3f968b 100644 --- a/man/construct_waterdata_sample_request.Rd +++ b/man/construct_waterdata_sample_request.Rd @@ -100,7 +100,7 @@ would be needed from prior project information.} information would be needed from the data supplier.} \item{siteTypeName}{Site type name query parameter. See available -options by running \code{check_param("sitetype")$typeName}.} +options by running \code{check_waterdata_sample_params("sitetype")$typeName}.} \item{usgsPCode}{USGS parameter code. See available options by running \code{check_waterdata_sample_params("characteristics")$parameterCode}.} diff --git a/man/read_waterdata_samples.Rd b/man/read_waterdata_samples.Rd index 6ca606468..224ca4fc8 100644 --- a/man/read_waterdata_samples.Rd +++ b/man/read_waterdata_samples.Rd @@ -102,7 +102,7 @@ would be needed from prior project information.} information would be needed from the data supplier.} \item{siteTypeName}{Site type name query parameter. See available -options by running \code{check_param("sitetype")$typeName}.} +options by running \code{check_waterdata_sample_params("sitetype")$typeName}.} \item{usgsPCode}{USGS parameter code. See available options by running \code{check_waterdata_sample_params("characteristics")$parameterCode}.} From b600173f397dc4d3d706378df238c73799c93672 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Mon, 4 May 2026 12:19:18 -0500 Subject: [PATCH 14/29] Adding an ERROR to uses where a time argument is input like this: time = c(NA, as.Date(Sys.Date())) Because, this turns into: c(NA, as.Date(Sys.Date())) [1] NA 20577 And we don't know of those are days or seconds... --- DESCRIPTION | 2 +- NEWS | 3 + R/construct_api_requests.R | 43 +++- man/read_waterdata_channel.Rd | 3 +- man/read_waterdata_combined_meta.Rd | 2 +- tutorials/basic_slides_deck.qmd | 386 ++++++++++++++++++---------- tutorials/quick_intro_deck.qmd | 14 +- vignettes/Contributing.Rmd | 18 +- 8 files changed, 313 insertions(+), 158 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6890b9308..e7ad2af72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -68,5 +68,5 @@ Encoding: UTF-8 BuildVignettes: true VignetteBuilder: knitr BugReports: https://github.com/DOI-USGS/dataRetrieval/issues -RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) +Config/roxygen2/version: 8.0.0 diff --git a/NEWS b/NEWS index 1ba12dba6..3ee8bd776 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,9 @@ including it as an argument in each relevant waterdata OGC function. citations to possible values in read_waterdata_metadata. * Added field_measurements_series_id argument to read_waterdata_field_measurement * Removed NWIS tests +* Introduce an error if user inputs a numeric to any of the "time" arguments. +Because it is impossible to tell if they intended Dates or POSIX, we cannot +know for sure and therefore could add incorrect filters to the query. diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index 4fe15147c..c0469f64f 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -340,7 +340,7 @@ switch_arg_id <- function(ls, id_name, service) { #' dataRetrieval:::format_api_dates(end) #' dataRetrieval:::format_api_dates(end, TRUE) #' -#' end <- c(NA, as.POSIXct("2021-01-01 12:15:00")) +#' end <- as.POSIXct(c(NA, "2021-01-01 12:15:00")) #' dataRetrieval:::format_api_dates(end) #' #' start_end <- as.POSIXct(c("2021-01-01 12:15:00", @@ -370,17 +370,24 @@ switch_arg_id <- function(ls, id_name, service) { #' start <- "2025-10-01" #' end <- Sys.Date() #' dataRetrieval:::format_api_dates(c(start, end), date = TRUE) +#' +#' # This is a problem because the first value forces the +#' # vector to be numeric, and then we don't really +#' # know if the 2nd value is a Date (number of days since 1970) +#' # or if it's a date/time (number of seconds..) +#' half_range <- c(NA, as.Date("2025-01-01")) +#' # Will error: +#' #dataRetrieval:::format_api_dates(half_range, date = FALSE) +#' # Better way to do it: +#' better_half <- as.Date(c(NA, "2025-01-01")) +#' dataRetrieval:::format_api_dates(better_half, date = FALSE) format_api_dates <- function(datetime, date = FALSE) { if (is.character(datetime)) { datetime[datetime == ""] <- NA datetime <- toupper(datetime) } - if (all(is.na(datetime))) { - return(NA) - } - - if (all(is.null(datetime))) { + if (all(is.na(datetime)) | all(is.null(datetime))) { return(NA) } @@ -388,6 +395,18 @@ format_api_dates <- function(datetime, date = FALSE) { stop("datetime should only include 1-2 values") } + if (is.numeric(datetime)) { + # Until we can figure out a way to know if the + # original input was suppose to be Date or Posix + # We can't determine what the user meant. + stop( + "A time query was entered as numeric. This could lead to errors. +Check any time queries that might have been automatically converted to numeric. +This could happen if you use c(NA, as.Date(Sys.Date())) instead of +as.Date(c(NA, Sys.Date()) for example." + ) + } + if (length(datetime) == 1) { # If the user has "P" or the "/" we assume they know what they are doing if ( @@ -395,13 +414,13 @@ format_api_dates <- function(datetime, date = FALSE) { grepl("/", datetime) ) { return(datetime) + } + + if (date) { + datetime <- get_Date(datetime) } else { - if (date) { - datetime <- get_Date(datetime) - } else { - datetime1 <- get_dateTime(datetime) - datetime <- lubridate::format_ISO8601(datetime1, usetz = "Z") - } + datetime1 <- get_dateTime(datetime) + datetime <- lubridate::format_ISO8601(datetime1, usetz = "Z") } } else if (length(datetime) == 2) { if (date) { diff --git a/man/read_waterdata_channel.Rd b/man/read_waterdata_channel.Rd index 024cd908d..e9bee3b98 100644 --- a/man/read_waterdata_channel.Rd +++ b/man/read_waterdata_channel.Rd @@ -111,8 +111,7 @@ Examples: Only features that have a \code{last_modified} that intersects the value of datetime are selected. -\if{html}{\out{
}}\preformatted{ field_visit_id = NA_character_, -}\if{html}{\out{
}}} +field_visit_id = NA_character_,} \item{channel_measurement_type}{The channel measurement type.} diff --git a/man/read_waterdata_combined_meta.Rd b/man/read_waterdata_combined_meta.Rd index f7d22c0d1..efab47c84 100644 --- a/man/read_waterdata_combined_meta.Rd +++ b/man/read_waterdata_combined_meta.Rd @@ -237,7 +237,7 @@ See also Details below for more information.} \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code, field_measurement_id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, data_type, computation_identifier, thresholds, sublocatio +geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code, field_measurement_id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, data_type, computation_identifier, thresholds, sublocation_identifier, primary, web_description, parameter_description, parent_time_series_id. The default (\code{NA}) will return all columns of the data.} \item{skipGeometry}{This parameter can be used to skip response geometries for diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index 180967958..ecce42e78 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -15,12 +15,15 @@ title-slide-attributes: data-background-size: 15% data-background-position: 2% 2% editor: source +engine: knitr editor_options: chunk_output_type: console execute: echo: true warning: false message: false +params: + run_python: true --- ```{r} @@ -29,30 +32,46 @@ execute: # library(dataRetrieval) library(ggplot2) library(dplyr) +library(reticulate) +py_require("dataretrieval") +py_require("panda") +py_require("matplotlib") options(dplyr.summarise.inform = FALSE) -dt_me <- function(x, - page_length = 8, - paging = TRUE, - font = "0.7em", - escape = TRUE){ - DT::datatable(x, - rownames = FALSE, - options = list(pageLength = page_length, - info = FALSE, - searching = FALSE, - paging = paging, - lengthChange = FALSE, - initComplete = htmlwidgets::JS( - "function(settings, json) {", - paste0("$(this.api().table().container()).css({'font-size': '", - font, "'});"), - "}")), escape = escape) +evaluate_python <- params$run_python + +dt_me <- function( + x, + page_length = 8, + paging = TRUE, + font = "0.7em", + escape = TRUE +) { + DT::datatable( + x, + rownames = FALSE, + options = list( + pageLength = page_length, + info = FALSE, + searching = FALSE, + paging = paging, + lengthChange = FALSE, + initComplete = htmlwidgets::JS( + "function(settings, json) {", + paste0( + "$(this.api().table().container()).css({'font-size': '", + font, + "'});" + ), + "}" + ) + ), + escape = escape + ) } theme_set(theme_grey(base_size = 24)) -update_geom_defaults("point", list(size = 3)) - +update_geom_defaults("point", list(size = 3)) ``` @@ -151,13 +170,16 @@ Go to Tools -> Global Options -> Appearances to change style. ## Installation +::: {.panel-tabset} + +### R + `dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: ```{r} #| echo: true #| eval: false install.packages("dataRetrieval") - ``` Then each time you open R, you'll need to load the library: @@ -167,6 +189,33 @@ Then each time you open R, you'll need to load the library: library(dataRetrieval) ``` +### Python + +Whether you are a user or developer we recommend installing `dataretrieval` in a virtual environment. This can be done using something like virtualenv or conda. + +```{bash} +#| echo: true +#| eval: false +pip install dataretrieval +``` + +or + +```{bash} +#| echo: true +#| eval: false +conda -c conda-forge install dataretrieval +``` + +Then each time you open Python, you'll need to load the library: + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata +``` + +::: + ::: footer ::: @@ -194,14 +243,29 @@ library(dataRetrieval) ## Documentation within R: function help pages {.smaller} +::: {.panel-tabset} + +### R + Within R, you can call help files for any `dataRetrieval` function: ```{r} #| echo: true #| eval: false -?readWQPdata +?read_waterdata_daily ``` +### Python + +Within Python, you can call help for any `dataRetrieval` function: + +```{python} +#| eval: FALSE +help(waterdata.get_daily) +``` + +::: + :::: {.columns} ::: {.column width="50%"} @@ -219,18 +283,34 @@ Scroll down to the "Examples" to see how each function can be run. Examples +:::: {.panel-tabset} + +### R + ```{r} #| eval: false -# Legacy: -nameToUse <- "pH" -pHData <- readWQPdata(siteid = "USGS-04024315", - characteristicName = nameToUse) -ncol(pHData) -attr(pHData, "siteInfo") -attr(pHData, "queryTime") -attr(pHData, "url") +site <- "USGS-02238500" +dv_data_sf <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = "00060", + time = c("2021-01-01", "2022-01-01") +) +``` + +### Python + +```{python} +#| eval: false +df, md = waterdata.get_daily( + + monitoring_location_id="USGS-02238500", + parameter_code="00060", + time="2021-01-01T00:00:00Z/2022-01-01T00:00:00Z", +) ``` +:::: + ::: :::: @@ -459,11 +539,12 @@ pcode <- "00060" # Discharge stat_cd <- "00003" # Mean range <- c("2024-10-01", NA) -df <- read_waterdata_daily(monitoring_location_id = site, - parameter_code = pcode, - statistic_id = stat_cd, - time = range) - +df <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = pcode, + statistic_id = stat_cd, + time = range +) ``` ::: footer @@ -476,12 +557,11 @@ In RStudio, click on the data frame in the upper right Environment tab to open a ```{r} #| echo: false - -dt_me(df |> - sf::st_drop_geometry(), - page_length = 3) - - +dt_me( + df |> + sf::st_drop_geometry(), + page_length = 3 +) ``` ::: footer @@ -498,10 +578,7 @@ Let's use `ggplot2` to visualize the data. library(ggplot2) ggplot(data = df) + - geom_point(aes(x = time, - y = value, - color = approval_status)) - + geom_point(aes(x = time, y = value, color = approval_status)) ``` ## Water Data API Notes: Argument input @@ -521,9 +598,10 @@ Use your "tab" key! ```{r} #| eval: false #| echo: true -discharge <- read_waterdata_daily(parameter_code = "00060", - statistic_id = "00003") - +discharge <- read_waterdata_daily( + parameter_code = "00060", + statistic_id = "00003" +) ``` ::: {.fragment} @@ -569,9 +647,11 @@ Here are a bunch of valid inputs: time = "2025-01-01" time = as.Date("2025-01-01") time = "2025-01-01T23:20:50Z" -time = as.POSIXct("2025-01-01T23:20:50Z", - format = "%Y-%m-%dT%H:%M:%S", - tz = "UTC") +time = as.POSIXct( + "2025-01-01T23:20:50Z", + format = "%Y-%m-%dT%H:%M:%S", + tz = "UTC" +) # Ask for specific range time = c("2024-01-01", "2025-01-01") # or Dates or POSIXs # Asking beginning of record to specific end: @@ -603,10 +683,12 @@ Let's get orthophosphate ("00660") data from the Shenandoah River at Front Royal site <- "USGS-01631000" pcode <- "00660" -qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, - usgsPCode = pcode, - dataType = "results", - dataProfile = "basicphyschem") +qw_data <- read_waterdata_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + dataType = "results", + dataProfile = "basicphyschem" +) ncol(qw_data) ``` @@ -626,21 +708,31 @@ That's a LOT of columns returned. We won't look at them here, but you can use `V ```{r} #| echo: false - -df <- tibble(dataType = c("results", "locations", "activities", "projects", "organizations"), - Description = c("Results data and metadata for measures and observations matching your query", - "Find monitoring locations that have data matching your query", - "Information about the monitoring activities conducted that produced data", - "Information on the projects that have results matching your data query", - "Information about the organizations that have provided data that matches your query"), - dataProfile = c('fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', - 'site
count', - 'sampact
actmetric
actgroup
ncount', - 'project
projectmonitoringlocationweight', - 'organization
count')) +df <- tibble( + dataType = c( + "results", + "locations", + "activities", + "projects", + "organizations" + ), + Description = c( + "Results data and metadata for measures and observations matching your query", + "Find monitoring locations that have data matching your query", + "Information about the monitoring activities conducted that produced data", + "Information on the projects that have results matching your data query", + "Information about the organizations that have provided data that matches your query" + ), + dataProfile = c( + 'fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', + 'site
count', + 'sampact
actmetric
actgroup
ncount', + 'project
projectmonitoringlocationweight', + 'organization
count' + ) +) dt_me(df, escape = FALSE, paging = FALSE) - ``` ::: footer @@ -654,16 +746,19 @@ Let's pull a few columns out and look at those: ```{r} library(dplyr) -qw_data_slim <- qw_data |> - select(Date = Activity_StartDate, - Result_Measure, - DL_cond = Result_ResultDetectionCondition, - DL_val = DetectionLimit_MeasureA, - DL_type = DetectionLimit_TypeA) |> - mutate(Result = if_else(!is.na(DL_cond), DL_val, Result_Measure), - Detected = if_else(!is.na(DL_cond), "Not Detected", "Detected")) |> +qw_data_slim <- qw_data |> + select( + Date = Activity_StartDate, + Result_Measure, + DL_cond = Result_ResultDetectionCondition, + DL_val = DetectionLimit_MeasureA, + DL_type = DetectionLimit_TypeA + ) |> + mutate( + Result = if_else(!is.na(DL_cond), DL_val, Result_Measure), + Detected = if_else(!is.na(DL_cond), "Not Detected", "Detected") + ) |> arrange(Detected) - ``` * What is `|>`? It's a pipe! It says take 'this thing' and put it in 'that thing'. You'll also see `%>%` in code, it is also a pipe - they are basically the same. @@ -676,8 +771,7 @@ qw_data_slim <- qw_data |> ```{r} #| echo: false - -dt_me(qw_data_slim, page_length = 8, font = "0.7em") +dt_me(qw_data_slim, page_length = 8, font = "0.7em") ``` ::: footer @@ -706,16 +800,20 @@ p_code_qw <- "00665" start_date <- "2015-07-03" end_date <- "2025-07-03" -qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, - usgsPCode = p_code_qw, - activityStartDateLower = start_date, - activityStartDateUpper = end_date, - dataProfile = "basicphyschem") +qw_data <- read_waterdata_samples( + monitoringLocationIdentifier = site, + usgsPCode = p_code_qw, + activityStartDateLower = start_date, + activityStartDateUpper = end_date, + dataProfile = "basicphyschem" +) -dv_data <- read_waterdata_daily(monitoring_location_id = site, - parameter_code = p_code_dv, - statistic_id = stat_cd, - time = c(start_date, end_date)) +dv_data <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = p_code_dv, + statistic_id = stat_cd, + time = c(start_date, end_date) +) ``` ## Step 2: Join @@ -726,9 +824,8 @@ library(dplyr) little_dv <- dv_data |> select(time, Flow = value, monitoring_location_id) -qw_data_joined <- qw_data |> - left_join(little_dv, - by = c("Activity_StartDate" = "time")) +qw_data_joined <- qw_data |> + left_join(little_dv, by = c("Activity_StartDate" = "time")) ``` * "Activity_StartDate" (on the left side data frame) and "time" (on the right side data frame) need to be the same type (in this case, both are Date objects). @@ -744,11 +841,14 @@ qw_data_joined <- qw_data |> ```{r} #| eval: false -qw_data <- qw_data |> - left_join(little_dv, - by = c("Activity_StartDate" = "time", - "Location_Identifier" = "monitoring_location_id")) - +qw_data <- qw_data |> + left_join( + little_dv, + by = c( + "Activity_StartDate" = "time", + "Location_Identifier" = "monitoring_location_id" + ) + ) ``` See `dplyr` documentation for lots of joining options, but I find `left_join` my "go-to" for straightforward joins. @@ -764,9 +864,7 @@ Let's take a quick peak: ```{r} #| output-location: column ggplot(data = qw_data_joined) + - geom_point(aes(x = Flow, - y = Result_Measure)) - + geom_point(aes(x = Flow, y = Result_Measure)) ``` @@ -794,12 +892,11 @@ band_instruments <- band_instruments ```{r} -band_members |> +band_members |> left_join(band_instruments, by = "name") -band_instruments |> +band_instruments |> left_join(band_members, by = "name") - ``` @@ -829,10 +926,12 @@ p_code_rt <- "99133" start_date <- "2024-01-01" end_date <- "2024-06-01" -continuous_data <- read_waterdata_continuous(monitoring_location_id = site_id, - parameter_code = p_code_rt, - time = c(start_date, end_date)) - +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) + names(continuous_data) ``` @@ -859,8 +958,7 @@ https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lan ```{r} #| output-location: column ggplot(data = continuous_data) + - geom_point(aes(x = time, - y = value)) + geom_point(aes(x = time, y = value)) ``` ## Workflow 5: Join Discrete and Continuous @@ -869,12 +967,13 @@ That same site also measures discrete Nitrate plus Nitrite, which is parameter c ```{r} #| message: true -discrete_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-11455508", - usgsPCode = "00631", - activityStartDateLower = start_date, - activityStartDateUpper = end_date, - dataProfile = "basicphyschem") - +discrete_data <- read_waterdata_samples( + monitoringLocationIdentifier = "USGS-11455508", + usgsPCode = "00631", + activityStartDateLower = start_date, + activityStartDateUpper = end_date, + dataProfile = "basicphyschem" +) ``` ## Workflow 5: Join Discrete and Continuous @@ -895,8 +994,12 @@ discrete_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-114 library(data.table) setDT(discrete_data)[, join_date := Activity_StartDateTime] setDT(continuous_data)[, join_date := time] - -closest_dt <- continuous_data[discrete_data, on = .(join_date), roll = "nearest"] + +closest_dt <- continuous_data[ + discrete_data, + on = .(join_date), + roll = "nearest" +] closest_dt <- data.frame(closest_dt) ``` @@ -909,13 +1012,11 @@ closest_dt <- data.frame(closest_dt) ```{r} #| output-location: column ggplot(data = closest_dt) + - geom_point(aes(x = Result_Measure, - y = value)) + + geom_point(aes(x = Result_Measure, y = value)) + geom_abline() + expand_limits(x = 0, y = 0) + xlab("Discrete") + ylab("Continuous") - ``` @@ -937,13 +1038,19 @@ ts_available <- read_waterdata_ts_meta(monitoring_location_id = "USGS-04183500") ```{r} #| echo: false - -dt_me(ts_available |> - sf::st_drop_geometry() |> - select(parameter_name, - parameter_code, statistic_id, begin, end, - computation_identifier), page_length = 6) - +dt_me( + ts_available |> + sf::st_drop_geometry() |> + select( + parameter_name, + parameter_code, + statistic_id, + begin, + end, + computation_identifier + ), + page_length = 6 +) ``` ::: footer @@ -953,19 +1060,24 @@ dt_me(ts_available |> ## Data Discovery: Discrete {.smaller} ```{r} -discrete_available <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-04183500") - +discrete_available <- summarize_waterdata_samples( + monitoringLocationIdentifier = "USGS-04183500" +) ``` ```{r} #| echo: false - -dt_me(discrete_available |> - select(characteristicUserSupplied, - resultCount, activityCount, - firstActivity, mostRecentActivity), - page_length = 6) - +dt_me( + discrete_available |> + select( + characteristicUserSupplied, + resultCount, + activityCount, + firstActivity, + mostRecentActivity + ), + page_length = 6 +) ``` ::: footer @@ -977,8 +1089,10 @@ dt_me(discrete_available |> * characteristicUserSupplied can be an input to `read_waterdata_sample` ```{r} -discrete1 <- read_waterdata_samples(characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", - monitoringLocationIdentifier = "USGS-04183500") +discrete1 <- read_waterdata_samples( + characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", + monitoringLocationIdentifier = "USGS-04183500" +) nrow(discrete1) ``` diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index 125507745..11bb4380a 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -33,6 +33,7 @@ params: library(ggplot2) library(dplyr) library(reticulate) + py_require("dataretrieval") py_require("panda") py_require("matplotlib") @@ -129,12 +130,12 @@ In this ~45 minute introduction, the goal is: ## Installation -`dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: - ::: {.panel-tabset} ### R +`dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: + ```{r} #| echo: true #| eval: false @@ -150,11 +151,18 @@ library(dataRetrieval) ### Python +Whether you are a user or developer we recommend installing `dataretrieval` in a virtual environment. This can be done using something like virtualenv or conda. + ```{bash} #| echo: true #| eval: false pip install dataretrieval +``` +```{bash} +#| echo: true +#| eval: false +conda -c conda-forge install dataretrieval ``` Then each time you open Python, you'll need to load the library: @@ -210,7 +218,7 @@ Within R, you can call help files for any `dataRetrieval` function: Within Python, you can call help for any `dataRetrieval` function: ```{python} -#| eval: !expr evaluate_python +#| eval: FALSE help(waterdata.get_daily) ``` diff --git a/vignettes/Contributing.Rmd b/vignettes/Contributing.Rmd index 97e4be399..823be0d58 100644 --- a/vignettes/Contributing.Rmd +++ b/vignettes/Contributing.Rmd @@ -15,9 +15,7 @@ editor_options: ```{r setup, include=FALSE} library(knitr) -opts_chunk$set(echo = TRUE, - warning = FALSE, - message = FALSE) +opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) ``` @@ -388,6 +386,20 @@ git pull codeusgs main In general, create a pull request to push to the DOI-USGS/dataRetrieval main branch or a merge request to push to the main branch of code.usgs.gov/water/dataRetrieval. +# Setting up Quarto Slides + +Slide decks are produced using Quarto in the "tutorials" folder. They require both R and Python to be installed. To install, download and install Miniforge. For Windows, open the "Miniforge Prompt", for MacOS and Linux open a terminal. Navigate to the dataRetrieval directory and type the following: + +``` +mamba env create -f environment.yml +``` + +Then activate the environment (still in Miniforge) using `conda`: +``` +conda activate pyclass +``` +The slides will use the R package `reticulate` to manage flipping back and forth between R and Python. To help `reticulate` know where Python is installed, you will need to add an envionmnental variable to your .Renviorn file "RETICULATE_PYTHON". Run `usethis::edit_r_environ()`, then add the path to your Python installation, and restart R. RStudio can render both the R and Python in the Quarto slides. However, if you want to do troubleshooting on individual code chunks, you might want to switch to Positron which allows seamless transition between R and Python consoles. + # References From e3402e8a36f2ba9cb96f12621697d9e902cf00a8 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Tue, 5 May 2026 11:41:59 -0500 Subject: [PATCH 15/29] some python updates --- tutorials/basic_slides_deck.qmd | 358 +++++++++++++++++++++++++++----- tutorials/quick_intro_deck.qmd | 75 ++++--- 2 files changed, 341 insertions(+), 92 deletions(-) diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index ecce42e78..71b617051 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -69,9 +69,6 @@ dt_me <- function( escape = escape ) } - -theme_set(theme_grey(base_size = 24)) -update_geom_defaults("point", list(size = 3)) ``` @@ -255,22 +252,11 @@ Within R, you can call help files for any `dataRetrieval` function: ?read_waterdata_daily ``` -### Python - -Within Python, you can call help for any `dataRetrieval` function: - -```{python} -#| eval: FALSE -help(waterdata.get_daily) -``` - -::: - :::: {.columns} ::: {.column width="50%"} -Click here to open a new window: +Click here to open a new window in RStudio: ![](images/help_file_2.png) @@ -283,10 +269,6 @@ Scroll down to the "Examples" to see how each function can be run. Examples -:::: {.panel-tabset} - -### R - ```{r} #| eval: false site <- "USGS-02238500" @@ -297,23 +279,21 @@ dv_data_sf <- read_waterdata_daily( ) ``` +::: + +:::: + ### Python -```{python} -#| eval: false -df, md = waterdata.get_daily( +Within Python, you can call help for any `dataretrieval` function: - monitoring_location_id="USGS-02238500", - parameter_code="00060", - time="2021-01-01T00:00:00Z/2022-01-01T00:00:00Z", -) +```{python} +#| eval: !expr evaluate_python +help(waterdata.get_daily) ``` -:::: - ::: -:::: ::: footer @@ -527,9 +507,13 @@ We're going walk through 3 retrievals: ::: -## Workflow 1: Daily data for known site +## Workflow 1: Daily data for known site {.smaller} + +Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from the last year. -Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2024 onward. +::: {.panel-tabset} + +### R ```{r} #| message: true @@ -537,16 +521,38 @@ library(dataRetrieval) site <- "USGS-09405500" pcode <- "00060" # Discharge stat_cd <- "00003" # Mean -range <- c("2024-10-01", NA) df <- read_waterdata_daily( monitoring_location_id = site, parameter_code = pcode, statistic_id = stat_cd, - time = range + time = "P365D" +) +nrow(df) +``` + +### Python + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata + +site = "USGS-09405500" +pcode = "00060" # Discharge +stat_cd = "00003" # Mean + +df, md = waterdata.get_daily( + monitoring_location_id=site, + parameter_code=pcode, + statistic_id=stat_cd, + time="P365D", ) + +df.shape[0] ``` +::: + ::: footer ::: @@ -570,24 +576,56 @@ dt_me( ## Workflow 1: Plot Daily Data +::: {.panel-tabset} + +### R + Let's use `ggplot2` to visualize the data. ```{r} #| echo: true #| output-location: column library(ggplot2) +theme_set(theme_bw(base_size = 24)) +update_geom_defaults("point", list(size = 3, color = "steelblue")) +options(ggplot2.discrete.colour = "viridis") ggplot(data = df) + geom_point(aes(x = time, y = value, color = approval_status)) ``` -## Water Data API Notes: Argument input +### Python + +Let's use `matplotlib` to visualize the data. + +```{python} +#| echo: true +#| output-location: column +import matplotlib.pyplot as plt +import pandas as pd + +plt.rcParams["font.size"] = 20 + +levels, categories = pd.factorize(df["approval_status"]) + +fig, ax = plt.subplots() +scatter = ax.scatter(x=df.time, y=df.value, c=levels) +fig.legend(scatter.legend_elements()[0], categories, title="Status") +``` + +::: + +::: footer + +::: + +## Water Data API Notes: Argument input Use your "tab" key! ![](images/autocomplete.png) -## Water Data API Notes: Arguments +## Water Data API Notes: Arguments {.smaller} * When you look at the help file for the new functions, you’ll notice there are lots of possible inputs (arguments). @@ -642,7 +680,7 @@ The "time" argument has a few options: Here are a bunch of valid inputs: ```{r} -#| code-line-numbers: "1-7|8-9|10-13|14-17" +#| code-line-numbers: "1-9|10-11|12-15|16-19" # Ask for exact times: time = "2025-01-01" time = as.Date("2025-01-01") @@ -674,10 +712,14 @@ Use your "tab" key! ![](images/autocomplete_samples.png) -## Workflow 2: Discrete data for known site +## Workflow 2: Discrete data for known site {.smaller} Let's get orthophosphate ("00660") data from the Shenandoah River at Front Royal, VA ("USGS-01631000"). +::: {.panel-tabset} + +### R + ```{r} #| message: true site <- "USGS-01631000" @@ -692,6 +734,27 @@ qw_data <- read_waterdata_samples( ncol(qw_data) ``` +R generates a few POSIXct columns to combine date, time, timezone information. + +### Python + +```{python} +#| eval: !expr evaluate_python +site = "USGS-01631000" +pcode = "00660" + +qw_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + service = "results", + profile = "basicphyschem", +) + +qw_data.shape[1] +``` + +::: + That's a LOT of columns returned. We won't look at them here, but you can use `View` in RStudio to explore on your own. ::: footer @@ -739,10 +802,14 @@ dt_me(df, escape = FALSE, paging = FALSE) ::: -## Workflow 2: Discrete data censoring +## Workflow 2: Discrete data censoring {.smaller} Let's pull a few columns out and look at those: +::: {.panel-tabset} + +### R + ```{r} library(dplyr) @@ -763,6 +830,35 @@ qw_data_slim <- qw_data |> * What is `|>`? It's a pipe! It says take 'this thing' and put it in 'that thing'. You'll also see `%>%` in code, it is also a pipe - they are basically the same. +### Python + +```{python} +#| eval: !expr evaluate_python +import numpy as np + +qw_data_slim = ( + qw_data.rename( + columns={ + "Activity_StartDate": "Date", + "Result_ResultDetectionCondition": "DL_cond", + "DetectionLimit_MeasureA": "DL_val", + "DetectionLimit_TypeA": "DL_type", + } + )[["Date", "Result_Measure", "DL_cond", "DL_val", "DL_type"]] + .assign( + Result=lambda x: np.where( + x["DL_cond"].notna(), x["DL_val"], x["Result_Measure"] + ) + ) + .assign( + Detected=lambda x: np.where(x["DL_cond"].notna(), "Not Detected", "Detected") + ) + .sort_values(by="Detected", ascending=False) +) +``` + +::: + ::: footer ::: @@ -790,7 +886,11 @@ dt_me(qw_data_slim, page_length = 8, font = "0.7em") ::: -## Step 1: Get the data +## Step 1: Get the data {.smaller} + +::: {.panel-tabset} + +### R ```{r} site <- "USGS-04183500" @@ -816,11 +916,47 @@ dv_data <- read_waterdata_daily( ) ``` -## Step 2: Join +### Python + +```{python} +#| eval: !expr evaluate_python +site = "USGS-04183500" +p_code_dv = "00060" +stat_cd = "00003" +p_code_qw = "00665" +start_date = "2015-07-03" +end_date = "2025-07-03" + +qw_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier=site, + usgsPCode=p_code_qw, + activityStartDateLower=start_date, + activityStartDateUpper=end_date, + profile="basicphyschem", +) + +dv_data, md_dv = waterdata.get_daily( + monitoring_location_id=site, + parameter_code=p_code_dv, + statistic_id=stat_cd, + time=(start_date + "/" + end_date), +) +``` -```{r} -library(dplyr) +::: + +::: footer + +::: + +## Step 2: Join + +::: {.panel-tabset} + +### R + +```{r} little_dv <- dv_data |> select(time, Flow = value, monitoring_location_id) @@ -831,6 +967,25 @@ qw_data_joined <- qw_data |> * "Activity_StartDate" (on the left side data frame) and "time" (on the right side data frame) need to be the same type (in this case, both are Date objects). +### Python + +```{python} +#| eval: !expr evaluate_python +little_dv = dv_data.rename(columns={"value": "Flow"})[ + ["time", "Flow", "monitoring_location_id"] +] + +qw_data["Activity_StartDate"] = pd.to_datetime( + qw_data["Activity_StartDate"], format="%Y-%m-%d" +) + +qw_data_joined = pd.merge( + qw_data, little_dv, left_on="Activity_StartDate", right_on="time", how="left" +) +``` + +::: + ::: footer ::: @@ -839,6 +994,10 @@ qw_data_joined <- qw_data |> * You could join on multiple columns: +::: {.panel-tabset} + +### R + ```{r} #| eval: false qw_data <- qw_data |> @@ -853,6 +1012,21 @@ qw_data <- qw_data |> See `dplyr` documentation for lots of joining options, but I find `left_join` my "go-to" for straightforward joins. +### Python + +```{python} +#| eval: !expr evaluate_python +qw_data = pd.merge( + qw_data, + little_dv, + left_on=["Activity_StartDate", "Location_Identifier"], + right_on=["time", "monitoring_location_id"], + how="left", +) +``` + +::: + ::: footer ::: @@ -861,12 +1035,27 @@ See `dplyr` documentation for lots of joining options, but I find `left_join` my Let's take a quick peak: +::: {.panel-tabset} + +### R + ```{r} #| output-location: column ggplot(data = qw_data_joined) + geom_point(aes(x = Flow, y = Result_Measure)) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +#| output-location: column +plt.figure() +plt.scatter(x=qw_data_joined.Flow, y=qw_data_joined.Result_Measure) +``` + + +::: ## Exercise 2: Joins {.smaller} @@ -913,14 +1102,17 @@ band_instruments |> * We'll look at Suisun Bay a Van Sickle Island NR Pittsburg CA ("USGS-11455508"), with parameter code "99133" which is Nitrate plus Nitrite. -## Workflow 4: Continuous data for known site +## Workflow 4: Continuous data for known site {.smaller} :::: {.columns} -::: {.column width="70%"} +::: {.column width="65%"} + +::: {.panel-tabset} + +### R ```{r} -#| results: markup site_id <- "USGS-11455508" p_code_rt <- "99133" start_date <- "2024-01-01" @@ -931,40 +1123,83 @@ continuous_data <- read_waterdata_continuous( parameter_code = p_code_rt, time = c(start_date, end_date) ) +``` + +``` +Requesting: +https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-11455508¶meter_code=99133&time=2024-01-01T00%3A00%3A00Z%2F2024-06-01T00%3A00%3A00Z +``` + +### Python + +```{python} +#| eval: !expr evaluate_python +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" -names(continuous_data) +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = date_range +) ``` ::: -::: {.column width="30%"} +::: + +::: {.column width="35%"} ``` - [4] "time" "unit_of_measure" "parameter_code" - [7] "statistic_id" "value" "approval_status" -[10] "last_modified" "qualifier" + [1] "monitoring_location_id" + [2] "parameter_code" + [3] "statistic_id" + [4] "time" + [5] "value" + [6] "unit_of_measure" + [7] "approval_status" + [8] "last_modified" + [9] "qualifier" +[10] "time_series_id" ``` ::: :::: -``` -Requesting: -https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-11455508¶meter_code=99133&time=2024-01-01T00%3A00%3A00Z%2F2024-06-01T00%3A00%3A00Z -``` + ## Workflow 4: Inspect +::: {.panel-tabset} + +### R + ```{r} #| output-location: column ggplot(data = continuous_data) + geom_point(aes(x = time, y = value)) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +#| output-location: column +plt.figure() +plt.scatter(x=continuous_data.time, y=continuous_data.value) +``` + +::: + ## Workflow 5: Join Discrete and Continuous That same site also measures discrete Nitrate plus Nitrite, which is parameter code "00631". Let's first grab that data: +::: {.panel-tabset} + +### R + ```{r} #| message: true discrete_data <- read_waterdata_samples( @@ -976,6 +1211,21 @@ discrete_data <- read_waterdata_samples( ) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +discrete_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier = "USGS-11455508", + usgsPCode = "00631", + activityStartDateLower = start_date, + activityStartDateUpper = end_date, + profile = "basicphyschem" +) +``` + +::: + ## Workflow 5: Join Discrete and Continuous * We now want to join the **closest** continuous sensor time with the discrete sample time. diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index 11bb4380a..2eea39ecf 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -71,9 +71,6 @@ dt_me <- function( escape = escape ) } - -theme_set(theme_grey(base_size = 24)) -update_geom_defaults("point", list(size = 3)) ``` @@ -213,22 +210,11 @@ Within R, you can call help files for any `dataRetrieval` function: ?read_waterdata_daily ``` -### Python - -Within Python, you can call help for any `dataRetrieval` function: - -```{python} -#| eval: FALSE -help(waterdata.get_daily) -``` - -::: - :::: {.columns} ::: {.column width="50%"} -Click here to open a new window: +Click here to open a new window in RStudio: ![](images/help_file_2.png) @@ -241,10 +227,6 @@ Scroll down to the "Examples" to see how each function can be run. Examples -::: {.panel-tabset} - -### R - ```{r} #| eval: false site <- "USGS-02238500" @@ -255,23 +237,21 @@ dv_data_sf <- read_waterdata_daily( ) ``` +::: + +:::: + ### Python -```{python} -#| eval: false -df, md = waterdata.get_daily( +Within Python, you can call help for any `dataretrieval` function: - monitoring_location_id="USGS-02238500", - parameter_code="00060", - time="2021-01-01T00:00:00Z/2022-01-01T00:00:00Z", -) +```{python} +#| eval: !expr evaluate_python +help(waterdata.get_daily) ``` ::: -::: - -:::: ::: footer @@ -533,6 +513,11 @@ Let's use `ggplot2` to visualize the data. #| output-location: column library(ggplot2) +theme_set(theme_bw(base_size = 24)) +update_geom_defaults("point", list(size = 3, color = "steelblue")) +options(ggplot2.discrete.colour = "viridis") +options(ggplot2.discrete.fill = "viridis") + ggplot(data = df) + geom_point(aes(x = time, y = value, color = approval_status)) ``` @@ -547,9 +532,13 @@ Let's use `matplotlib` to visualize the data. import matplotlib.pyplot as plt import pandas as pd -df["approval_status"] = pd.Categorical(df["approval_status"]).codes +plt.rcParams["font.size"] = 20 -plt.scatter(x=df.time, y=df.value, c=df.approval_status) +levels, categories = pd.factorize(df["approval_status"]) + +fig, ax = plt.subplots() +scatter = ax.scatter(x=df.time, y=df.value, c=levels) +fig.legend(scatter.legend_elements()[0], categories, title="Status") ``` @@ -616,7 +605,7 @@ The "time" argument has a few options: Here are a bunch of valid inputs: ```{r} -#| code-line-numbers: "1-7|8-9|10-13|14-17" +#| code-line-numbers: "1-9|10-11|12-15|16-19" # Ask for exact times: time = "2025-01-01" time = as.Date("2025-01-01") @@ -766,7 +755,6 @@ continuous_data <- read_waterdata_continuous( parameter_code = p_code_rt, time = c(start_date, end_date) ) -nrow(continuous_data) ``` ### Python @@ -778,12 +766,8 @@ p_code_rt = "99133" date_range = "2024-01-01/2024-06-01" continuous_data, md_cont = waterdata.get_continuous( - monitoring_location_id = site_id, - parameter_code = p_code_rt, - time = date_range + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range ) -continuous_data.shape[0] - ``` ::: @@ -815,12 +799,27 @@ https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lan ## Workflow 3: Inspect +::: {.panel-tabset} + +### R + ```{r} #| output-location: column ggplot(data = continuous_data) + geom_point(aes(x = time, y = value)) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +#| output-location: column +plt.figure() +plt.scatter(x=continuous_data.time, y=continuous_data.value) +``` + +::: + ## Data Discovery From 021b11ea99c6832d39a1caca16fffa8f11da2e10 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Tue, 5 May 2026 11:55:16 -0500 Subject: [PATCH 16/29] let's always rebuild (for now) --- .gitlab-ci.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d8446a162..b7cd3ecb6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -40,11 +40,6 @@ build-image: services: - name: ${DEVOPS_REGISTRY}usgs/docker:20-dind alias: docker - rules: - - changes: - - .gitlab-ci.yml - - docker/Dockerfile - - environment.yml script: - echo ${CI_REGISTRY_PASSWORD} | docker login -u ${CI_REGISTRY_USER} --password-stdin $CI_REGISTRY - docker pull ${CI_REGISTRY_IMAGE}:latest || true From 8d20e6513ef13b6337511a73884e07c6affe9f11 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Tue, 5 May 2026 16:13:46 -0500 Subject: [PATCH 17/29] More cleanup --- tutorials/basic_slides_deck.qmd | 47 +++++++++++++++++++++++++++++---- tutorials/quick_intro_deck.qmd | 7 +++-- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index 71b617051..bb5893f37 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -963,9 +963,7 @@ little_dv <- dv_data |> qw_data_joined <- qw_data |> left_join(little_dv, by = c("Activity_StartDate" = "time")) ``` - -* "Activity_StartDate" (on the left side data frame) and "time" (on the right side data frame) need to be the same type (in this case, both are Date objects). - + ### Python @@ -986,6 +984,8 @@ qw_data_joined = pd.merge( ::: +* "Activity_StartDate" (on the left side data frame) and "time" (on the right side data frame) need to be the same type (in this case, both are Date objects). + ::: footer ::: @@ -1282,10 +1282,22 @@ The next slides will demo how to use those. ## Data Discovery: Time Series {.smaller} +::: {.panel-tabset} + +### R ```{r} ts_available <- read_waterdata_ts_meta(monitoring_location_id = "USGS-04183500") ``` +### Python + +```{python} +#| eval: !expr evaluate_python +ts_avail, ts_me = waterdata.get_time_series_metadata(monitoring_location_id="USGS-04183500") +``` + +::: + ```{r} #| echo: false dt_me( @@ -1309,12 +1321,19 @@ dt_me( ## Data Discovery: Discrete {.smaller} +::: {.panel-tabset} + +### R + ```{r} discrete_available <- summarize_waterdata_samples( monitoringLocationIdentifier = "USGS-04183500" ) ``` + +::: + ```{r} #| echo: false dt_me( @@ -1338,6 +1357,10 @@ dt_me( * characteristicUserSupplied can be an input to `read_waterdata_sample` +::: {.panel-tabset} + +### R + ```{r} discrete1 <- read_waterdata_samples( characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", @@ -1346,15 +1369,29 @@ discrete1 <- read_waterdata_samples( nrow(discrete1) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +discrete1, discrete1_me = waterdata.get_samples( + characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", + monitoringLocationIdentifier = "USGS-04183500" +) +discrete1.shape[1] +``` +::: ## More Information {.smaller} -- dataRetrieval repository: +- dataRetrieval R repository: - - [Documentation](https://doi-usgs.github.io/dataRetrieval) - [dataRetrieval New Features](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html) - - [General Tutorial](https://rconnect.usgs.gov/NMC_dataRetrieval_1/dataRetrieval_1.html) + +- dataretrieval Python repository: + - + - [Documentation](https://doi-usgs.github.io/dataretrieval-python/) - Contact: - Computational Tools Email: comptools@usgs.gov diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index 2eea39ecf..d65d56d96 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -899,11 +899,14 @@ nrow(discrete1) ## More Information {.smaller} -- dataRetrieval repository: +- dataRetrieval R repository: - - [Documentation](https://doi-usgs.github.io/dataRetrieval) - [dataRetrieval New Features](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html) - - [General Tutorial](https://rconnect.usgs.gov/NMC_dataRetrieval_1/dataRetrieval_1.html) + +- dataretrieval Python repository: + - + - [Documentation](https://doi-usgs.github.io/dataretrieval-python/) - Contact: - Computational Tools Email: comptools@usgs.gov From feae5d10492590184b5cccc6314763a79ff1e98d Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 6 May 2026 07:52:55 -0500 Subject: [PATCH 18/29] save fewer artifacts --- .gitlab-ci.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b7cd3ecb6..00fabdb83 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -46,6 +46,8 @@ build-image: - docker build -f docker/Dockerfile -t ${CI_REGISTRY_IMAGE}:latest . - docker push --all-tags ${CI_REGISTRY_IMAGE} + artifacts: + when: never buildcheck: stage: check @@ -65,7 +67,7 @@ buildcheck: artifacts: paths: - "*.tar.gz" - expire_in: 1 week + expire_in: 1 hour unittests: stage: test @@ -86,7 +88,7 @@ unittests: x' artifacts: when: always - expire_in: 1 week + expire_in: 1 hour paths: - test-out.xml reports: @@ -107,6 +109,8 @@ longtest: - if: $RUN_LONG_TESTS == "FALSE" when: always - when: never + artifacts: + when: never pages: stage: end @@ -124,7 +128,7 @@ pages: artifacts: paths: - $PAGES_OUTDIR - expire_in: 1 week + expire_in: 1 hour Validate Inventory: stage: end @@ -139,3 +143,5 @@ Validate Inventory: --token "${GIT_TOKEN_CUSTOM}" tags: - chs-shared + artifacts: + when: never \ No newline at end of file From 13cdba8f57038d9792861d51230b27b0e078669d Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 6 May 2026 07:57:52 -0500 Subject: [PATCH 19/29] try to fix pipeline --- .gitlab-ci.yml | 6 ++++-- man/summarize_waterdata_samples.Rd | 11 +++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 00fabdb83..d9020968f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,3 @@ -image: ${CI_REGISTRY_IMAGE}:latest - workflow: rules: - if: $CI_COMMIT_TAG @@ -50,6 +48,7 @@ build-image: when: never buildcheck: + image: ${CI_REGISTRY_IMAGE}:latest stage: check cache: [] dependencies: @@ -70,6 +69,7 @@ buildcheck: expire_in: 1 hour unittests: + image: ${CI_REGISTRY_IMAGE}:latest stage: test cache: [] dependencies: @@ -99,6 +99,7 @@ unittests: coverage: '/Coverage: \d+.\d+\%/' longtest: + image: ${CI_REGISTRY_IMAGE}:latest stage: test dependencies: - build-image @@ -113,6 +114,7 @@ longtest: when: never pages: + image: ${CI_REGISTRY_IMAGE}:latest stage: end dependencies: - build-image diff --git a/man/summarize_waterdata_samples.Rd b/man/summarize_waterdata_samples.Rd index 3b5b7f17c..6afda5abd 100644 --- a/man/summarize_waterdata_samples.Rd +++ b/man/summarize_waterdata_samples.Rd @@ -10,10 +10,13 @@ summarize_waterdata_samples(monitoringLocationIdentifier) summarize_USGS_samples(monitoringLocationIdentifier) } \arguments{ -\item{monitoringLocationIdentifier}{A monitoring location identifier has two parts, -separated by a dash (-): the agency code and the location number. Location identifiers should be separated with commas, -for example: AZ014-320821110580701, CAX01-15304600, USGS-040851385. Location -numbers without an agency prefix are assumed to have the prefix USGS.} +\item{monitoringLocationIdentifier}{A single monitoring location identifier +with two parts, separated by a dash (-): the agency code and the location +number. Examples: USGS-040851385, AZ014-320821110580701, CAX01-15304600. +The summary service accepts only one site at a time; supplying a vector of +length > 1 raises an error. The agency prefix is required: bare location +numbers (e.g. "040851385") are accepted by the service but return an empty +result.} } \value{ data frame with summary of data available based on the monitoringLocationIdentifier From 252873c6f69ea0243ce47f6241b34d431a009606 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 6 May 2026 08:01:00 -0500 Subject: [PATCH 20/29] no artifacts --- .gitlab-ci.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d9020968f..a5073ef58 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,3 +1,5 @@ +image: ${CI_REGISTRY_IMAGE}:latest + workflow: rules: - if: $CI_COMMIT_TAG @@ -45,10 +47,9 @@ build-image: -f docker/Dockerfile -t ${CI_REGISTRY_IMAGE}:latest . - docker push --all-tags ${CI_REGISTRY_IMAGE} artifacts: - when: never + paths: [] buildcheck: - image: ${CI_REGISTRY_IMAGE}:latest stage: check cache: [] dependencies: @@ -69,7 +70,6 @@ buildcheck: expire_in: 1 hour unittests: - image: ${CI_REGISTRY_IMAGE}:latest stage: test cache: [] dependencies: @@ -99,7 +99,6 @@ unittests: coverage: '/Coverage: \d+.\d+\%/' longtest: - image: ${CI_REGISTRY_IMAGE}:latest stage: test dependencies: - build-image @@ -111,10 +110,9 @@ longtest: when: always - when: never artifacts: - when: never + paths: [] pages: - image: ${CI_REGISTRY_IMAGE}:latest stage: end dependencies: - build-image @@ -146,4 +144,4 @@ Validate Inventory: tags: - chs-shared artifacts: - when: never \ No newline at end of file + paths: [] From e193c3835af422940efd2af8395c092da1fa201a Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 6 May 2026 08:04:17 -0500 Subject: [PATCH 21/29] ? --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a5073ef58..39f48b915 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -47,7 +47,7 @@ build-image: -f docker/Dockerfile -t ${CI_REGISTRY_IMAGE}:latest . - docker push --all-tags ${CI_REGISTRY_IMAGE} artifacts: - paths: [] + expire_in: 5 minutes buildcheck: stage: check @@ -110,7 +110,7 @@ longtest: when: always - when: never artifacts: - paths: [] + expire_in: 5 minutes pages: stage: end @@ -144,4 +144,4 @@ Validate Inventory: tags: - chs-shared artifacts: - paths: [] + expire_in: 5 minutes From df780f2a8f1fcf2df28f8bc4f537b024f71108b7 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 6 May 2026 08:05:32 -0500 Subject: [PATCH 22/29] formatting --- .gitlab-ci.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 39f48b915..3dfcf3492 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -131,17 +131,17 @@ pages: expire_in: 1 hour Validate Inventory: - stage: end - only: - - main - image: ${INTERNAL_REGISTRY}software/software-management:latest - script: - - software-management review - --project "${CI_PROJECT_PATH}" - --ref "${CI_COMMIT_BRANCH}" - --type "provisional" - --token "${GIT_TOKEN_CUSTOM}" - tags: - - chs-shared + stage: end + only: + - main + image: ${INTERNAL_REGISTRY}software/software-management:latest + script: + - software-management review + --project "${CI_PROJECT_PATH}" + --ref "${CI_COMMIT_BRANCH}" + --type "provisional" + --token "${GIT_TOKEN_CUSTOM}" + tags: + - chs-shared artifacts: expire_in: 5 minutes From 2bcb219e031db376bcc9005d5606949849ec20fa Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 7 May 2026 07:51:48 -0500 Subject: [PATCH 23/29] Update tutorials/quick_intro_deck.qmd Co-authored-by: Joe Zemmels (he/him) --- tutorials/quick_intro_deck.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index d65d56d96..d9c4369d3 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -148,7 +148,7 @@ library(dataRetrieval) ### Python -Whether you are a user or developer we recommend installing `dataretrieval` in a virtual environment. This can be done using something like virtualenv or conda. +Whether you are a user or developer we recommend installing `dataretrieval` in a virtual environment. This can be done using something like [virtualenv](https://docs.python.org/3/tutorial/venv.html) or [conda](https://docs.conda.io/projects/conda/en/stable/user-guide/install/index.html). ```{bash} #| echo: true From 27ee7330535575154ef3afa2194c5b584acf5839 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 7 May 2026 08:56:39 -0500 Subject: [PATCH 24/29] more cleanup --- tutorials/basic_slides_deck.qmd | 11 ++++-- tutorials/quick_intro_deck.qmd | 66 +++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index bb5893f37..961789357 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -1218,8 +1218,8 @@ discrete_data <- read_waterdata_samples( discrete_data, md_qw = waterdata.get_samples( monitoringLocationIdentifier = "USGS-11455508", usgsPCode = "00631", - activityStartDateLower = start_date, - activityStartDateUpper = end_date, + activityStartDateLower = "2024-01-01", + activityStartDateUpper = "2024-06-01", profile = "basicphyschem" ) ``` @@ -1238,9 +1238,12 @@ discrete_data, md_qw = waterdata.get_samples( ## Workflow 5: Join Discrete and Continuous +::: {.panel-tabset} + +### R ```{r} -#| code-line-numbers: "1|2-3|5|6|1-6" +#| code-line-numbers: "1|2-3|5-10" library(data.table) setDT(discrete_data)[, join_date := Activity_StartDateTime] setDT(continuous_data)[, join_date := time] @@ -1253,6 +1256,8 @@ closest_dt <- continuous_data[ closest_dt <- data.frame(closest_dt) ``` +::: + ::: footer ::: diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index d65d56d96..ed32a4d3f 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -735,16 +735,36 @@ dt_me(df, escape = FALSE, paging = FALSE) ## Workflow 3: Continuous data for known site {.smaller} +::: {.panel-tabset} + +### R + :::: {.columns} ::: {.column width="65%"} -::: {.panel-tabset} +```{r} +#| eval: false +site_id <- "USGS-11455508" +p_code_rt <- "99133" +start_date <- "2024-01-01" +end_date <- "2024-06-01" -### R +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) +names(continuous_data) +``` + +::: + +::: {.column width="35%"} ```{r} #| results: markup +#| echo: false site_id <- "USGS-11455508" p_code_rt <- "99133" start_date <- "2024-01-01" @@ -755,12 +775,21 @@ continuous_data <- read_waterdata_continuous( parameter_code = p_code_rt, time = c(start_date, end_date) ) +names(continuous_data) ``` +::: + +:::: + ### Python +:::: {.columns} + +::: {.column width="65%"} + ```{python} -#| eval: !expr evaluate_python +#| eval: false site_id = "USGS-11455508" p_code_rt = "99133" date_range = "2024-01-01/2024-06-01" @@ -772,30 +801,27 @@ continuous_data, md_cont = waterdata.get_continuous( ::: -::: - ::: {.column width="35%"} +```{python} +#| eval: !expr evaluate_python +#| results: markup +#| echo: false +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" + +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range +) ``` - [1] "monitoring_location_id" - [2] "parameter_code" - [3] "statistic_id" - [4] "time" - [5] "value" - [6] "unit_of_measure" - [7] "approval_status" - [8] "last_modified" - [9] "qualifier" -[10] "time_series_id" -``` + ::: :::: -``` -Requesting: -https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-11455508¶meter_code=99133&time=2024-01-01T00%3A00%3A00Z%2F2024-06-01T00%3A00%3A00Z -``` +::: + ## Workflow 3: Inspect From 1eb8d803df0b83d9153aba4df2257f8f9b705f7f Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 7 May 2026 12:44:19 -0500 Subject: [PATCH 25/29] Do we slim down the Docker image if we create the enviornment only in the Quarto step? --- .gitlab-ci.yml | 1 + docker/Dockerfile | 6 ------ environment.yml | 1 + tutorials/quick_intro_deck.qmd | 11 ++++++++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3dfcf3492..dbd74e18e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -119,6 +119,7 @@ pages: - buildcheck script: - | + conda env create -n dataretrieval -f /environment.yml && conda clean -a -y Rscript -e ' source_file <- list.files()[grep(".tar.gz", list.files())] install.packages(source_file, type = "source", dependencies = FALSE) diff --git a/docker/Dockerfile b/docker/Dockerfile index 93422fde0..0319c1f50 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,9 +1,5 @@ FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.5 -# Disable the annoying bell on WSL2 -RUN sed -i 's/^# set bell-style none$/set bell-style none/' /etc/inputrc -RUN echo 'set visualbell' >> /root/.vimrc - # Change the name of this environment to something which pleases you, if you # so please. But the name will not be relevant for most cases, as reticulate # will be pointed to the environment no matter what it is named. @@ -19,8 +15,6 @@ RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/lat rm Miniforge3.sh ENV CONDA_DIR="/root/conda" ENV PATH=$CONDA_DIR/bin:$PATH -COPY environment.yml / -RUN conda env create -n ${CONDA_ENVIRONMENT_NAME} -f /environment.yml && conda clean -a -y # Necessary R libraries RUN apt-get update -qq && apt-get -y --no-install-recommends install \ diff --git a/environment.yml b/environment.yml index 2244b414b..7d0891d4e 100644 --- a/environment.yml +++ b/environment.yml @@ -33,4 +33,5 @@ dependencies: - xz-gpl-tools=5.8.1=hbcc6ac9_2 - xz-tools=5.8.1=hb9d3cd8_2 - dataretrieval + - geopandas prefix: /home/user/miniforge3/envs/dataretrieval diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index 8a3a27f38..f80367aa3 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -145,10 +145,8 @@ Then each time you open R, you'll need to load the library: #| message: true library(dataRetrieval) ``` +### Python -### Python - -Whether you are a user or developer we recommend installing `dataretrieval` in a virtual environment. This can be done using something like [virtualenv](https://docs.python.org/3/tutorial/venv.html) or [conda](https://docs.conda.io/projects/conda/en/stable/user-guide/install/index.html). ```{bash} #| echo: true @@ -765,6 +763,7 @@ names(continuous_data) ```{r} #| results: markup #| echo: false +options(width = 30) site_id <- "USGS-11455508" p_code_rt <- "99133" start_date <- "2024-01-01" @@ -807,6 +806,7 @@ continuous_data, md_cont = waterdata.get_continuous( #| eval: !expr evaluate_python #| results: markup #| echo: false +pd.set_option("display.width", 30) site_id = "USGS-11455508" p_code_rt = "99133" date_range = "2024-01-01/2024-06-01" @@ -814,6 +814,7 @@ date_range = "2024-01-01/2024-06-01" continuous_data, md_cont = waterdata.get_continuous( monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range ) +continuous_data.columns ``` ::: @@ -860,6 +861,10 @@ plt.scatter(x=continuous_data.time, y=continuous_data.value) The next slides will demo how to use those. +::: footer + +::: + ## Data Discovery: Time Series {.smaller} ```{r} From d9b148b5762c86f153978a449c266c1ae628abb9 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 7 May 2026 13:02:36 -0500 Subject: [PATCH 26/29] try again --- .gitlab-ci.yml | 2 +- tutorials/basic_slides_deck.qmd | 72 ++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dbd74e18e..bd6a37149 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -119,7 +119,7 @@ pages: - buildcheck script: - | - conda env create -n dataretrieval -f /environment.yml && conda clean -a -y + conda env create -n dataretrieval -f environment.yml && conda clean -a -y Rscript -e ' source_file <- list.files()[grep(".tar.gz", list.files())] install.packages(source_file, type = "source", dependencies = FALSE) diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index 961789357..5981d5c52 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -1104,15 +1104,37 @@ band_instruments |> ## Workflow 4: Continuous data for known site {.smaller} +::: {.panel-tabset} + +### R + :::: {.columns} ::: {.column width="65%"} -::: {.panel-tabset} +```{r} +#| eval: false +site_id <- "USGS-11455508" +p_code_rt <- "99133" +start_date <- "2024-01-01" +end_date <- "2024-06-01" -### R +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) +names(continuous_data) +``` + +::: + +::: {.column width="35%"} ```{r} +#| results: markup +#| echo: false +options(width = 30) site_id <- "USGS-11455508" p_code_rt <- "99133" start_date <- "2024-01-01" @@ -1123,50 +1145,54 @@ continuous_data <- read_waterdata_continuous( parameter_code = p_code_rt, time = c(start_date, end_date) ) +names(continuous_data) ``` -``` -Requesting: -https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-11455508¶meter_code=99133&time=2024-01-01T00%3A00%3A00Z%2F2024-06-01T00%3A00%3A00Z -``` +::: + +:::: ### Python +:::: {.columns} + +::: {.column width="65%"} + ```{python} -#| eval: !expr evaluate_python +#| eval: false site_id = "USGS-11455508" p_code_rt = "99133" date_range = "2024-01-01/2024-06-01" continuous_data, md_cont = waterdata.get_continuous( - monitoring_location_id = site_id, - parameter_code = p_code_rt, - time = date_range + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range ) ``` ::: -::: - ::: {.column width="35%"} +```{python} +#| eval: !expr evaluate_python +#| results: markup +#| echo: false +pd.set_option("display.width", 30) +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" + +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range +) +continuous_data.columns ``` - [1] "monitoring_location_id" - [2] "parameter_code" - [3] "statistic_id" - [4] "time" - [5] "value" - [6] "unit_of_measure" - [7] "approval_status" - [8] "last_modified" - [9] "qualifier" -[10] "time_series_id" -``` + ::: :::: +::: ## Workflow 4: Inspect From 458c92ca04b8ed395e43b0aaa09c8da49c932a2d Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 7 May 2026 13:50:21 -0500 Subject: [PATCH 27/29] pipeline work --- .gitlab-ci.yml | 1 + NEWS | 2 ++ R/read_waterdata_field_measurements.R | 7 ++++++- R/read_waterdata_field_meta.R | 2 +- docker/Dockerfile | 1 - man/read_waterdata_field_measurements.Rd | 6 +++++- 6 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bd6a37149..49a7ea711 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -32,6 +32,7 @@ variables: PAGES_OUTDIR: "$CI_PROJECT_DIR/public" CUSTOM_DR_UA: "GitLab_CI" API_USGS_PAT: "${API_USGS_PAT}" + RETICULATE_PYTHON: "$CI_PROJECT_DIR/root/conda/envs/dataretrieval/bin/python" build-image: stage: build diff --git a/NEWS b/NEWS index 3ee8bd776..7daff6a98 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ citations to possible values in read_waterdata_metadata. * Introduce an error if user inputs a numeric to any of the "time" arguments. Because it is impossible to tell if they intended Dates or POSIX, we cannot know for sure and therefore could add incorrect filters to the query. +* The "id" column that comes back from read_waterdata_fiel_meta was changed to +field_measurements_series_id to match the expectation of `read_waterdata_field_measurements` diff --git a/R/read_waterdata_field_measurements.R b/R/read_waterdata_field_measurements.R index d0718abe6..25d822569 100644 --- a/R/read_waterdata_field_measurements.R +++ b/R/read_waterdata_field_measurements.R @@ -20,7 +20,12 @@ #' See also Details below for more information. #' @param qualifier `r get_ogc_params("field-measurements")$qualifier` #' @param field_visit_id `r get_ogc_params("field-measurements")$field_visit_id` -#' @param field_measurements_series_id `r get_ogc_params("field-measurements")$field_measurements_series_id` +#' @param field_measurements_series_id A unique identifier representing a single +#' collection series. This corresponds to the `field_measurements_series_id` field in the +#' `read_waterdata_field_meta` endpoint. Collection series are defined as the +#' set of field measurements at a given monitoring location for a single parameter +#' code using a single reading type. +#' #' @param vertical_datum `r get_ogc_params("field-measurements")$vertical_datum` #' @param measuring_agency `r get_ogc_params("field-measurements")$measuring_agency` #' @param control_condition `r get_ogc_params("field-measurements")$control_condition` diff --git a/R/read_waterdata_field_meta.R b/R/read_waterdata_field_meta.R index 4df6e2ac5..b45df0154 100644 --- a/R/read_waterdata_field_meta.R +++ b/R/read_waterdata_field_meta.R @@ -85,7 +85,7 @@ read_waterdata_field_meta <- function( attach_request = getOption("dataRetrieval.attach_request") ) { service <- "field-measurements-metadata" - output_id <- "field_series_id" + output_id <- "field_measurements_series_id" rlang::check_dots_empty() args <- mget(names(formals())) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0319c1f50..e8176cb33 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -43,4 +43,3 @@ RUN apt-get update -qq && apt-get -y --no-install-recommends install \ r-cran-reticulate \ && rm -rf /var/lib/apt/lists/* -ENV RETICULATE_PYTHON=/root/conda/envs/${CONDA_ENVIRONMENT_NAME}/bin/python diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd index 0a29719d5..b9543526c 100644 --- a/man/read_waterdata_field_measurements.Rd +++ b/man/read_waterdata_field_measurements.Rd @@ -50,7 +50,11 @@ The default (\code{NA}) will return all columns of the data.} \item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} -\item{field_measurements_series_id}{A unique identifier representing a single collection series. This corresponds to the \code{id} field in the \code{field-measurements-metadata} endpoint. Collection series are defined as the set of field measurements at a given monitoring location for a single parameter code using a single reading type.} +\item{field_measurements_series_id}{A unique identifier representing a single +collection series. This corresponds to the \code{field_measurements_series_id} field in the +\code{read_waterdata_field_meta} endpoint. Collection series are defined as the +set of field measurements at a given monitoring location for a single parameter +code using a single reading type.} \item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} From f9d1c71bc0a340125f1481929978ea22ef862386 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 7 May 2026 14:15:15 -0500 Subject: [PATCH 28/29] go back --- .gitlab-ci.yml | 2 -- docker/Dockerfile | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 49a7ea711..3dfcf3492 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -32,7 +32,6 @@ variables: PAGES_OUTDIR: "$CI_PROJECT_DIR/public" CUSTOM_DR_UA: "GitLab_CI" API_USGS_PAT: "${API_USGS_PAT}" - RETICULATE_PYTHON: "$CI_PROJECT_DIR/root/conda/envs/dataretrieval/bin/python" build-image: stage: build @@ -120,7 +119,6 @@ pages: - buildcheck script: - | - conda env create -n dataretrieval -f environment.yml && conda clean -a -y Rscript -e ' source_file <- list.files()[grep(".tar.gz", list.files())] install.packages(source_file, type = "source", dependencies = FALSE) diff --git a/docker/Dockerfile b/docker/Dockerfile index e8176cb33..93422fde0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,9 @@ FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.5 +# Disable the annoying bell on WSL2 +RUN sed -i 's/^# set bell-style none$/set bell-style none/' /etc/inputrc +RUN echo 'set visualbell' >> /root/.vimrc + # Change the name of this environment to something which pleases you, if you # so please. But the name will not be relevant for most cases, as reticulate # will be pointed to the environment no matter what it is named. @@ -15,6 +19,8 @@ RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/lat rm Miniforge3.sh ENV CONDA_DIR="/root/conda" ENV PATH=$CONDA_DIR/bin:$PATH +COPY environment.yml / +RUN conda env create -n ${CONDA_ENVIRONMENT_NAME} -f /environment.yml && conda clean -a -y # Necessary R libraries RUN apt-get update -qq && apt-get -y --no-install-recommends install \ @@ -43,3 +49,4 @@ RUN apt-get update -qq && apt-get -y --no-install-recommends install \ r-cran-reticulate \ && rm -rf /var/lib/apt/lists/* +ENV RETICULATE_PYTHON=/root/conda/envs/${CONDA_ENVIRONMENT_NAME}/bin/python From 75df7a5746810a292e31464d2b8b78406b443500 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Thu, 7 May 2026 14:21:31 -0500 Subject: [PATCH 29/29] ? --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index 7d0891d4e..2244b414b 100644 --- a/environment.yml +++ b/environment.yml @@ -33,5 +33,4 @@ dependencies: - xz-gpl-tools=5.8.1=hbcc6ac9_2 - xz-tools=5.8.1=hb9d3cd8_2 - dataretrieval - - geopandas prefix: /home/user/miniforge3/envs/dataretrieval