|
3 | 3 | import warnings |
4 | 4 | from collections import defaultdict |
5 | 5 | from concurrent.futures import ThreadPoolExecutor |
| 6 | +from datetime import datetime |
6 | 7 | from functools import wraps |
7 | 8 | from operator import itemgetter |
| 9 | +from urllib.parse import urlencode |
8 | 10 | from uuid import uuid4 |
9 | 11 |
|
10 | 12 | import pandas as pd |
|
19 | 21 | ) |
20 | 22 |
|
21 | 23 | from ._logging import log_decorator |
| 24 | +from ._utils import function_translation, period_translation |
22 | 25 | from .globalsettings import environment |
23 | 26 | from .storage import Storage |
24 | 27 |
|
@@ -285,7 +288,7 @@ def delete(self, series_id): |
285 | 288 | ) |
286 | 289 |
|
287 | 290 | def _timer(func): |
288 | | - """Decorator used to log latency of the ``get`` method""" |
| 291 | + """Decorator used to log latency of the ``get`` and ``get_samples_aggregate`` method""" |
289 | 292 |
|
290 | 293 | @wraps(func) |
291 | 294 | def wrapper(self, series_id, start=None, end=None, **kwargs): |
@@ -411,6 +414,148 @@ def get( |
411 | 414 |
|
412 | 415 | return series |
413 | 416 |
|
| 417 | + @log_decorator("exception") |
| 418 | + @_timer |
| 419 | + @log_decorator("warning") |
| 420 | + def get_samples_aggregate( |
| 421 | + self, |
| 422 | + series_id, |
| 423 | + start=None, |
| 424 | + end=None, |
| 425 | + aggregation_period=None, |
| 426 | + aggregation_function=None, |
| 427 | + max_page_size=None, |
| 428 | + ): |
| 429 | + """ |
| 430 | + Retrieve a series from DataReservoir.io using the samples/aggregate endpoint. |
| 431 | +
|
| 432 | + Parameters |
| 433 | + ---------- |
| 434 | + series_id : str |
| 435 | + Identifier of the series to download |
| 436 | + start: required |
| 437 | + Start time (inclusive) of the aggregated series given as anything |
| 438 | + pandas.to_datetime is able to parse. Date must be within the past 90 days. |
| 439 | + end: |
| 440 | + Stop time (exclusive) of the aggregated series given as anything |
| 441 | + pandas.to_datetime is able to parse. Date must be within the past 90 days. |
| 442 | + aggregation_function : str |
| 443 | + One of "Avg", "Min", "Max", "Stdev". |
| 444 | + aggregation_period : str |
| 445 | + Used in combination with aggregation function to specify the period for aggregation. |
| 446 | + Aggregation period is maximum 24 hours. Values can be in units of h, m, s, ms, |
| 447 | + microsecond or tick. Use 100 ms instead of 0.1s for 10Hz. |
| 448 | + max_page_size : optional |
| 449 | + Maximum number of samples to return per page. The method automatically follows links |
| 450 | + to next pages and returns the entire series. For advanced usage. |
| 451 | + Returns |
| 452 | + ------- |
| 453 | + pandas.Series |
| 454 | + Series data |
| 455 | + """ |
| 456 | + if not start: |
| 457 | + # Required parameter |
| 458 | + raise ValueError( |
| 459 | + "You must specify the start date in ISO 8601 format, for example 2023-12-01" |
| 460 | + ) |
| 461 | + |
| 462 | + if not end: |
| 463 | + # Required parameter |
| 464 | + raise ValueError( |
| 465 | + "You must specify the end date in ISO 8601 format, for example 2023-12-31." |
| 466 | + ) |
| 467 | + |
| 468 | + if not aggregation_period: |
| 469 | + # Required parameter |
| 470 | + raise ValueError( |
| 471 | + "Aggregation period must be specified using integers and one of these units: h, m, s, ms, microsecond or tick, or their Pandas equivalents" |
| 472 | + ) |
| 473 | + |
| 474 | + if not aggregation_function: |
| 475 | + # Required parameter |
| 476 | + raise ValueError( |
| 477 | + "Aggregation function must be one of: Avg (mean), Min, Max, Stdev (std)" |
| 478 | + ) |
| 479 | + |
| 480 | + # Translating some pandas terms to API terms |
| 481 | + # Note the API is case insensitive so both min and Min will work |
| 482 | + if aggregation_function in function_translation: |
| 483 | + aggregation_function = function_translation[aggregation_function] |
| 484 | + |
| 485 | + for period_unit in period_translation: |
| 486 | + if ( |
| 487 | + aggregation_period.endswith(period_unit) |
| 488 | + and aggregation_period[-len(period_unit) - 1].isnumeric() |
| 489 | + ): |
| 490 | + aggregation_period = ( |
| 491 | + aggregation_period[: -len(period_unit)] |
| 492 | + + period_translation[period_unit] |
| 493 | + ) |
| 494 | + break |
| 495 | + |
| 496 | + start = pd.to_datetime(start, dayfirst=True, unit="ns", utc=True).isoformat() |
| 497 | + end = pd.to_datetime(end, dayfirst=True, unit="ns", utc=True).isoformat() |
| 498 | + |
| 499 | + params = {} |
| 500 | + |
| 501 | + if max_page_size: |
| 502 | + params["maxPageSize"] = max_page_size |
| 503 | + |
| 504 | + params["aggregationPeriod"] = aggregation_period |
| 505 | + params["aggregationFunction"] = aggregation_function |
| 506 | + params["start"] = start |
| 507 | + params["end"] = end |
| 508 | + |
| 509 | + next_page_link = f"{environment.api_base_url}reservoir/timeseries/{series_id}/samples/aggregate?{urlencode(params)}" |
| 510 | + |
| 511 | + df = ( |
| 512 | + pd.DataFrame(columns=("index", "values")) |
| 513 | + .astype({"index": "int64"}) |
| 514 | + .astype({"values": "float64"}, errors="ignore") |
| 515 | + ) |
| 516 | + |
| 517 | + @retry( |
| 518 | + stop=stop_after_attempt( |
| 519 | + 4 |
| 520 | + ), # Attempt!, not retry attempt. Attempt 2, is 1 retry |
| 521 | + retry=retry_if_exception_type( |
| 522 | + ( |
| 523 | + ConnectionError, |
| 524 | + requests.exceptions.ChunkedEncodingError, |
| 525 | + requests.ReadTimeout, |
| 526 | + ConnectionRefusedError, |
| 527 | + requests.ConnectionError, |
| 528 | + ) |
| 529 | + ), |
| 530 | + wait=wait_chain(*[wait_fixed(0.1), wait_fixed(0.5), wait_fixed(30)]), |
| 531 | + ) |
| 532 | + def get_samples_aggregate_page(url): |
| 533 | + return self._auth_session.get( |
| 534 | + url, |
| 535 | + timeout=_TIMEOUT_DEAULT, |
| 536 | + ) |
| 537 | + |
| 538 | + while next_page_link: |
| 539 | + response = get_samples_aggregate_page(next_page_link) |
| 540 | + response.raise_for_status() |
| 541 | + response_json = response.json() |
| 542 | + next_page_link = response_json.get("@odata.nextLink", None) |
| 543 | + |
| 544 | + content = [ |
| 545 | + (pd.to_datetime(sample["Timestamp"], utc=True), sample["Value"]) |
| 546 | + for sample in response_json["value"] |
| 547 | + ] |
| 548 | + |
| 549 | + new_df = pd.DataFrame( |
| 550 | + content, columns=("index", "values"), copy=False |
| 551 | + ).astype({"values": "float64"}, errors="ignore") |
| 552 | + |
| 553 | + df = pd.concat([df, new_df]) |
| 554 | + |
| 555 | + series = df.set_index("index").squeeze("columns").copy(deep=True) |
| 556 | + |
| 557 | + return series |
| 558 | + |
414 | 559 | def set_metadata( |
415 | 560 | self, |
416 | 561 | series_id, |
|
0 commit comments