Skip to content

Commit c5a3c9e

Browse files
authored
ux: Provide helpful link to documentation when error due to missing API token (#1364)
1 parent 40f5ea2 commit c5a3c9e

7 files changed

Lines changed: 168 additions & 87 deletions

File tree

openml/_api_calls.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from .__version__ import __version__
2525
from .exceptions import (
2626
OpenMLHashException,
27+
OpenMLNotAuthorizedError,
2728
OpenMLServerError,
2829
OpenMLServerException,
2930
OpenMLServerNoResult,
@@ -36,6 +37,8 @@
3637
FILE_ELEMENTS_TYPE = Dict[str, Union[str, Tuple[str, str]]]
3738
DATABASE_CONNECTION_ERRCODE = 107
3839

40+
API_TOKEN_HELP_LINK = "https://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication" # noqa: S105
41+
3942

4043
def _robot_delay(n: int) -> float:
4144
wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60
@@ -456,21 +459,28 @@ def __parse_server_exception(
456459
url: str,
457460
file_elements: FILE_ELEMENTS_TYPE | None,
458461
) -> OpenMLServerError:
459-
if response.status_code == 414:
462+
if response.status_code == requests.codes.URI_TOO_LONG:
460463
raise OpenMLServerError(f"URI too long! ({url})")
461464

465+
# OpenML has a sophisticated error system where information about failures is provided,
466+
# in the response body itself.
467+
# First, we need to parse it out.
462468
try:
463469
server_exception = xmltodict.parse(response.text)
464470
except xml.parsers.expat.ExpatError as e:
465471
raise e
466472
except Exception as e:
467-
# OpenML has a sophisticated error system
468-
# where information about failures is provided. try to parse this
473+
# If we failed to parse it out, then something has gone wrong in the body we have sent back
474+
# from the server and there is little extra information we can capture.
469475
raise OpenMLServerError(
470476
f"Unexpected server error when calling {url}. Please contact the developers!\n"
471477
f"Status code: {response.status_code}\n{response.text}",
472478
) from e
473479

480+
# Now we can parse out the specific error codes that we return. These
481+
# are in addition to the typical HTTP error codes, but encode more
482+
# specific informtion. You can find these codes here:
483+
# https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php
474484
server_error = server_exception["oml:error"]
475485
code = int(server_error["oml:code"])
476486
message = server_error["oml:message"]
@@ -496,4 +506,21 @@ def __parse_server_exception(
496506
)
497507
else:
498508
full_message = f"{message} - {additional_information}"
509+
510+
if code in [
511+
102, # flow/exists post
512+
137, # dataset post
513+
350, # dataset/42 delete
514+
310, # flow/<something> post
515+
320, # flow/42 delete
516+
400, # run/42 delete
517+
460, # task/42 delete
518+
]:
519+
msg = (
520+
f"The API call {url} requires authentication via an API key.\nPlease configure "
521+
"OpenML-Python to use your API as described in this example:"
522+
"\nhttps://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication"
523+
)
524+
return OpenMLNotAuthorizedError(message=msg)
525+
499526
return OpenMLServerException(code=code, message=full_message, url=url)

openml/config.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
import platform
1111
import shutil
1212
import warnings
13+
from contextlib import contextmanager
1314
from io import StringIO
1415
from pathlib import Path
15-
from typing import Any, cast
16+
from typing import Any, Iterator, cast
1617
from typing_extensions import Literal, TypedDict
1718
from urllib.parse import urlparse
1819

@@ -174,11 +175,11 @@ def get_server_base_url() -> str:
174175
apikey: str = _defaults["apikey"]
175176
show_progress: bool = _defaults["show_progress"]
176177
# The current cache directory (without the server name)
177-
_root_cache_directory = Path(_defaults["cachedir"])
178+
_root_cache_directory: Path = Path(_defaults["cachedir"])
178179
avoid_duplicate_runs = _defaults["avoid_duplicate_runs"]
179180

180-
retry_policy = _defaults["retry_policy"]
181-
connection_n_retries = _defaults["connection_n_retries"]
181+
retry_policy: Literal["human", "robot"] = _defaults["retry_policy"]
182+
connection_n_retries: int = _defaults["connection_n_retries"]
182183

183184

184185
def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None:
@@ -497,6 +498,18 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None:
497498
stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example
498499

499500

501+
@contextmanager
502+
def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]:
503+
"""A context manager to temporarily override variables in the configuration."""
504+
existing_config = get_config_as_dict()
505+
merged_config = {**existing_config, **config}
506+
507+
_setup(merged_config) # type: ignore
508+
yield merged_config # type: ignore
509+
510+
_setup(existing_config)
511+
512+
500513
__all__ = [
501514
"get_cache_directory",
502515
"set_root_cache_directory",

openml/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def _delete_entity(entity_type: str, entity_id: int) -> bool:
234234
" please open an issue at: https://github.com/openml/openml/issues/new"
235235
),
236236
) from e
237-
raise
237+
raise e
238238

239239

240240
@overload

tests/conftest.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
# License: BSD 3-Clause
2424
from __future__ import annotations
2525

26+
from collections.abc import Iterator
2627
import logging
2728
import os
2829
import shutil
@@ -195,55 +196,90 @@ def pytest_addoption(parser):
195196
def _expected_static_cache_state(root_dir: Path) -> list[Path]:
196197
_c_root_dir = root_dir / "org" / "openml" / "test"
197198
res_paths = [root_dir, _c_root_dir]
198-
199+
199200
for _d in ["datasets", "tasks", "runs", "setups"]:
200201
res_paths.append(_c_root_dir / _d)
201202

202-
for _id in ["-1","2"]:
203+
for _id in ["-1", "2"]:
203204
tmp_p = _c_root_dir / "datasets" / _id
204-
res_paths.extend([
205-
tmp_p / "dataset.arff",
206-
tmp_p / "features.xml",
207-
tmp_p / "qualities.xml",
208-
tmp_p / "description.xml",
209-
])
205+
res_paths.extend(
206+
[
207+
tmp_p / "dataset.arff",
208+
tmp_p / "features.xml",
209+
tmp_p / "qualities.xml",
210+
tmp_p / "description.xml",
211+
]
212+
)
210213

211214
res_paths.append(_c_root_dir / "datasets" / "30" / "dataset_30.pq")
212215
res_paths.append(_c_root_dir / "runs" / "1" / "description.xml")
213216
res_paths.append(_c_root_dir / "setups" / "1" / "description.xml")
214-
217+
215218
for _id in ["1", "3", "1882"]:
216219
tmp_p = _c_root_dir / "tasks" / _id
217-
res_paths.extend([
218-
tmp_p / "datasplits.arff",
219-
tmp_p / "task.xml",
220-
])
221-
220+
res_paths.extend(
221+
[
222+
tmp_p / "datasplits.arff",
223+
tmp_p / "task.xml",
224+
]
225+
)
226+
222227
return res_paths
223228

224229

225230
def assert_static_test_cache_correct(root_dir: Path) -> None:
226231
for p in _expected_static_cache_state(root_dir):
227-
assert p.exists(), f"Expected path {p} does not exist"
228-
232+
assert p.exists(), f"Expected path {p} exists"
233+
229234

230235
@pytest.fixture(scope="class")
231236
def long_version(request):
232237
request.cls.long_version = request.config.getoption("--long")
233238

234239

235-
@pytest.fixture()
240+
@pytest.fixture(scope="session")
236241
def test_files_directory() -> Path:
237242
return Path(__file__).parent / "files"
238243

239244

240-
@pytest.fixture()
245+
@pytest.fixture(scope="session")
241246
def test_api_key() -> str:
242247
return "c0c42819af31e706efe1f4b88c23c6c1"
243248

244249

245-
@pytest.fixture(autouse=True)
246-
def verify_cache_state(test_files_directory) -> None:
250+
@pytest.fixture(autouse=True, scope="function")
251+
def verify_cache_state(test_files_directory) -> Iterator[None]:
247252
assert_static_test_cache_correct(test_files_directory)
248253
yield
249254
assert_static_test_cache_correct(test_files_directory)
255+
256+
257+
@pytest.fixture(autouse=True, scope="session")
258+
def as_robot() -> Iterator[None]:
259+
policy = openml.config.retry_policy
260+
n_retries = openml.config.connection_n_retries
261+
openml.config.set_retry_policy("robot", n_retries=20)
262+
yield
263+
openml.config.set_retry_policy(policy, n_retries)
264+
265+
266+
@pytest.fixture(autouse=True, scope="session")
267+
def with_test_server():
268+
openml.config.start_using_configuration_for_example()
269+
yield
270+
openml.config.stop_using_configuration_for_example()
271+
272+
273+
@pytest.fixture(autouse=True)
274+
def with_test_cache(test_files_directory, request):
275+
if not test_files_directory.exists():
276+
raise ValueError(
277+
f"Cannot find test cache dir, expected it to be {test_files_directory!s}!",
278+
)
279+
_root_cache_directory = openml.config._root_cache_directory
280+
tmp_cache = test_files_directory / request.node.name
281+
openml.config.set_root_cache_directory(tmp_cache)
282+
yield
283+
openml.config.set_root_cache_directory(_root_cache_directory)
284+
if tmp_cache.exists():
285+
shutil.rmtree(tmp_cache)

tests/test_evaluations/test_evaluations_example.py

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,47 @@
33

44
import unittest
55

6+
from openml.config import overwrite_config_context
7+
68

79
class TestEvaluationsExample(unittest.TestCase):
810
def test_example_python_paper(self):
911
# Example script which will appear in the upcoming OpenML-Python paper
1012
# This test ensures that the example will keep running!
11-
12-
import matplotlib.pyplot as plt
13-
import numpy as np
14-
15-
import openml
16-
17-
df = openml.evaluations.list_evaluations_setups(
18-
"predictive_accuracy",
19-
flows=[8353],
20-
tasks=[6],
21-
output_format="dataframe",
22-
parameters_in_separate_columns=True,
23-
) # Choose an SVM flow, for example 8353, and a task.
24-
25-
hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
26-
df[hp_names] = df[hp_names].astype(float).apply(np.log)
27-
C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"]
28-
29-
cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
30-
plt.colorbar(cntr, label="accuracy")
31-
plt.xlim((min(C), max(C)))
32-
plt.ylim((min(gamma), max(gamma)))
33-
plt.xlabel("C (log10)", size=16)
34-
plt.ylabel("gamma (log10)", size=16)
35-
plt.title("SVM performance landscape", size=20)
36-
37-
plt.tight_layout()
13+
with overwrite_config_context(
14+
{
15+
"server": "https://www.openml.org/api/v1/xml",
16+
"apikey": None,
17+
}
18+
):
19+
import matplotlib.pyplot as plt
20+
import numpy as np
21+
22+
import openml
23+
24+
df = openml.evaluations.list_evaluations_setups(
25+
"predictive_accuracy",
26+
flows=[8353],
27+
tasks=[6],
28+
output_format="dataframe",
29+
parameters_in_separate_columns=True,
30+
) # Choose an SVM flow, for example 8353, and a task.
31+
32+
assert len(df) > 0, (
33+
"No evaluation found for flow 8353 on task 6, could "
34+
"be that this task is not available on the test server."
35+
)
36+
37+
hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
38+
df[hp_names] = df[hp_names].astype(float).apply(np.log)
39+
C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"]
40+
41+
cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
42+
plt.colorbar(cntr, label="accuracy")
43+
plt.xlim((min(C), max(C)))
44+
plt.ylim((min(gamma), max(gamma)))
45+
plt.xlabel("C (log10)", size=16)
46+
plt.ylabel("gamma (log10)", size=16)
47+
plt.title("SVM performance landscape", size=20)
48+
49+
plt.tight_layout()

tests/test_openml/test_api_calls.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
import pytest
1010

1111
import openml
12+
from openml.config import ConfigurationForExamples
1213
import openml.testing
13-
from openml._api_calls import _download_minio_bucket
14+
from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK
1415

1516

1617
class TestConfig(openml.testing.TestBase):
@@ -99,3 +100,26 @@ def test_download_minio_failure(mock_minio, tmp_path: Path) -> None:
99100

100101
with pytest.raises(ValueError):
101102
_download_minio_bucket(source=some_url, destination=tmp_path)
103+
104+
105+
@pytest.mark.parametrize(
106+
"endpoint, method",
107+
[
108+
# https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php
109+
("flow/exists", "post"), # 102
110+
("dataset", "post"), # 137
111+
("dataset/42", "delete"), # 350
112+
# ("flow/owned", "post"), # 310 - Couldn't find what would trigger this
113+
("flow/42", "delete"), # 320
114+
("run/42", "delete"), # 400
115+
("task/42", "delete"), # 460
116+
],
117+
)
118+
def test_authentication_endpoints_requiring_api_key_show_relevant_help_link(
119+
endpoint: str,
120+
method: str,
121+
) -> None:
122+
# We need to temporarily disable the API key to test the error message
123+
with openml.config.overwrite_config_context({"apikey": None}):
124+
with pytest.raises(openml.exceptions.OpenMLNotAuthorizedError, match=API_TOKEN_HELP_LINK):
125+
openml._api_calls._perform_api_call(call=endpoint, request_method=method, data=None)

tests/test_utils/test_utils.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,37 +8,6 @@
88
from openml.testing import _check_dataset
99

1010

11-
@pytest.fixture(autouse=True)
12-
def as_robot():
13-
policy = openml.config.retry_policy
14-
n_retries = openml.config.connection_n_retries
15-
openml.config.set_retry_policy("robot", n_retries=20)
16-
yield
17-
openml.config.set_retry_policy(policy, n_retries)
18-
19-
20-
@pytest.fixture(autouse=True)
21-
def with_test_server():
22-
openml.config.start_using_configuration_for_example()
23-
yield
24-
openml.config.stop_using_configuration_for_example()
25-
26-
27-
@pytest.fixture(autouse=True)
28-
def with_test_cache(test_files_directory, request):
29-
if not test_files_directory.exists():
30-
raise ValueError(
31-
f"Cannot find test cache dir, expected it to be {test_files_directory!s}!",
32-
)
33-
_root_cache_directory = openml.config._root_cache_directory
34-
tmp_cache = test_files_directory / request.node.name
35-
openml.config.set_root_cache_directory(tmp_cache)
36-
yield
37-
openml.config.set_root_cache_directory(_root_cache_directory)
38-
if tmp_cache.exists():
39-
shutil.rmtree(tmp_cache)
40-
41-
4211
@pytest.fixture()
4312
def min_number_tasks_on_test_server() -> int:
4413
"""After a reset at least 1068 tasks are on the test server"""

0 commit comments

Comments
 (0)