Skip to content

Commit b3fbfa2

Browse files
refactor advanaced examples
1 parent 2a74c9a commit b3fbfa2

23 files changed

Lines changed: 731 additions & 152 deletions

examples/Advanced/README.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

examples/Advanced/configure_logging.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# %% [markdown]
2-
# # Logging
32
# This tutorial explains openml-python logging, and shows how to configure it.
43
# Openml-python uses the [Python logging module](https://docs.python.org/3/library/logging.html)
54
# to provide users with log messages. Each log message is assigned a level of importance, see
@@ -49,5 +48,3 @@
4948
# * 0: `logging.WARNING` and up.
5049
# * 1: `logging.INFO` and up.
5150
# * 2: `logging.DEBUG` and up (i.e. all messages).
52-
#
53-
# License: BSD 3-Clause

examples/Advanced/create_upload_tutorial.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# %% [markdown]
2-
# # Dataset upload tutorial
32
# A tutorial on how to create and upload a dataset to OpenML.
43

54
# %%
@@ -11,10 +10,6 @@
1110
import openml
1211
from openml.datasets.functions import create_dataset
1312

14-
# %% [markdown]
15-
# .. warning::
16-
# .. include:: ../../test_server_usage_warning.txt
17-
1813
# %%
1914
openml.config.start_using_configuration_for_example()
2015

@@ -308,4 +303,3 @@
308303

309304
# %%
310305
openml.config.stop_using_configuration_for_example()
311-
# License: BSD 3-Clause

examples/Advanced/datasets_tutorial.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# %% [markdown]
2-
# # Datasets
32
# How to list and download datasets.
43

54
import pandas as pd
@@ -46,8 +45,7 @@
4645

4746
# Print a summary
4847
print(
49-
f"This is dataset '{dataset.name}', the target feature is "
50-
f"'{dataset.default_target_attribute}'"
48+
f"This is dataset '{dataset.name}', the target feature is '{dataset.default_target_attribute}'"
5149
)
5250
print(f"URL: {dataset.url}")
5351
print(dataset.description[:500])
@@ -106,9 +104,6 @@
106104
# %% [markdown]
107105
# ## Edit a created dataset
108106
# This example uses the test server, to avoid editing a dataset on the main server.
109-
#
110-
# .. warning::
111-
# .. include:: ../../test_server_usage_warning.txt
112107

113108
# %%
114109
openml.config.start_using_configuration_for_example()
@@ -165,4 +160,3 @@
165160

166161
# %%
167162
openml.config.stop_using_configuration_for_example()
168-
# License: BSD 3-Clauses

examples/Advanced/fetch_evaluations_tutorial.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
# %% [markdown]
2-
# # Fetching Evaluations
3-
42
# Evaluations contain a concise summary of the results of all runs made. Each evaluation
53
# provides information on the dataset used, the flow applied, the setup used, the metric
64
# evaluated, and the result obtained on the metric, for each such run made. These collection
@@ -27,9 +25,7 @@
2725
# We shall retrieve a small set (only 10 entries) to test the listing function for evaluations
2826

2927
# %%
30-
openml.evaluations.list_evaluations(
31-
function="predictive_accuracy", size=10
32-
)
28+
openml.evaluations.list_evaluations(function="predictive_accuracy", size=10)
3329

3430
# Using other evaluation metrics, 'precision' in this case
3531
evals = openml.evaluations.list_evaluations(
@@ -182,6 +178,4 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
182178
function="predictive_accuracy", flows=[6767], size=100, parameters_in_separate_columns=True
183179
)
184180

185-
print(evals_setups.head(10))
186-
187-
# License: BSD 3-Clause
181+
print(evals_setups.head(10))

examples/Advanced/study_tutorial.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# %% [markdown]
2-
# # Benchmark studies
32
# How to list, download and upload benchmark studies.
43
# In contrast to
54
# [benchmark suites](https://docs.openml.org/benchmark/#benchmarking-suites) which
@@ -13,7 +12,6 @@
1312

1413
import openml
1514

16-
1715
# %% [markdown]
1816
# ## Listing studies
1917
#
@@ -22,14 +20,12 @@
2220
# easier-to-work-with data structure
2321

2422
# %%
25-
studies = openml.study.list_studies(output_format="dataframe", status="all")
23+
studies = openml.study.list_studies(status="all")
2624
print(studies.head(n=10))
2725

2826

2927
# %% [markdown]
3028
# ## Downloading studies
31-
32-
# %% [markdown]
3329
# This is done based on the study ID.
3430

3531
# %%
@@ -62,9 +58,6 @@
6258

6359
# %% [markdown]
6460
# We'll use the test server for the rest of this tutorial.
65-
#
66-
# .. warning::
67-
# .. include:: ../../test_server_usage_warning.txt
6861

6962
# %%
7063
openml.config.start_using_configuration_for_example()
@@ -76,7 +69,20 @@
7669
# In this examples we'll create a few runs for the OpenML-100 benchmark
7770
# suite which is available on the OpenML test server.
7871

72+
# <div class="admonition warning">
73+
# <p class="admonition-title">Warning</p>
74+
# <p>
75+
# For the rest of this tutorial, we will require the `openml-sklearn` package.
76+
# Install it with `pip install openml-sklearn`.
77+
# </p>
78+
# </div>
79+
7980
# %%
81+
# Get sklearn extension to run sklearn models easily on OpenML tasks.
82+
from openml_sklearn import SklearnExtension
83+
84+
extension = SklearnExtension()
85+
8086
# Model to be used
8187
clf = RandomForestClassifier()
8288

@@ -112,4 +118,3 @@
112118

113119
# %%
114120
openml.config.stop_using_configuration_for_example()
115-
# License: BSD 3-Clause

examples/Advanced/suites_tutorial.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,5 @@
11
# %% [markdown]
2-
# # Benchmark suites
3-
#
42
# How to list, download and upload benchmark suites.
5-
#
6-
# If you want to learn more about benchmark suites, check out our
7-
# brief introductory tutorial ["Simple suites tutorial"](../Basics/simple_suites_tutorial) or the
8-
# [OpenML benchmark docs](https://docs.openml.org/benchmark/#benchmarking-suites).
93

104
# %%
115
import uuid
@@ -14,7 +8,6 @@
148

159
import openml
1610

17-
1811
# %% [markdown]
1912
# ## Listing suites
2013
#
@@ -23,13 +16,11 @@
2316
# easier-to-work-with data structure
2417

2518
# %%
26-
suites = openml.study.list_suites(output_format="dataframe", status="all")
19+
suites = openml.study.list_suites(status="all")
2720
print(suites.head(n=10))
2821

2922
# %% [markdown]
3023
# ## Downloading suites
31-
32-
# %% [markdown]
3324
# This is done based on the dataset ID.
3425

3526
# %%
@@ -52,7 +43,7 @@
5243
# And we can use the task listing functionality to learn more about them:
5344

5445
# %%
55-
tasks = openml.tasks.list_tasks(output_format="dataframe")
46+
tasks = openml.tasks.list_tasks()
5647

5748
# %% [markdown]
5849
# Using ``@`` in
@@ -65,9 +56,6 @@
6556

6657
# %% [markdown]
6758
# We'll use the test server for the rest of this tutorial.
68-
#
69-
# .. warning::
70-
# .. include:: ../../test_server_usage_warning.txt
7159

7260
# %%
7361
openml.config.start_using_configuration_for_example()
@@ -83,7 +71,7 @@
8371
# the test server:
8472

8573
# %%
86-
all_tasks = list(openml.tasks.list_tasks(output_format="dataframe")["tid"])
74+
all_tasks = list(openml.tasks.list_tasks()["tid"])
8775
task_ids_for_suite = sorted(np.random.choice(all_tasks, replace=False, size=20))
8876

8977
# The study needs a machine-readable and unique alias. To obtain this,
@@ -102,4 +90,3 @@
10290

10391
# %%
10492
openml.config.stop_using_configuration_for_example()
105-
# License: BSD 3-Clause

examples/Advanced/task_manual_iteration_tutorial.py

Lines changed: 13 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,5 @@
11
# %% [markdown]
2-
# # Tasks: retrieving splits
3-
4-
# Tasks define a target and a train/test split. Normally, they are the input to the function
5-
# ``openml.runs.run_model_on_task`` which automatically runs the model on all splits of the task.
6-
# However, sometimes it is necessary to manually split a dataset to perform experiments outside of
7-
# the functions provided by OpenML. One such example is in the benchmark library
8-
# [HPOBench](https://github.com/automl/HPOBench) which extensively uses data from OpenML,
9-
# but not OpenML's functionality to conduct runs.
10-
2+
# Tasks define a target and a train/test split, which we can use for benchmarking.
113

124
# %%
135
import openml
@@ -45,12 +37,7 @@
4537

4638
# %%
4739
print(
48-
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
49-
task_id,
50-
n_repeats,
51-
n_folds,
52-
n_samples,
53-
)
40+
f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
5441
)
5542

5643
# %% [markdown]
@@ -72,19 +59,14 @@
7259
# And then split the data based on this:
7360

7461
# %%
75-
X, y = task.get_X_and_y(dataset_format="dataframe")
62+
X, y = task.get_X_and_y()
7663
X_train = X.iloc[train_indices]
7764
y_train = y.iloc[train_indices]
7865
X_test = X.iloc[test_indices]
7966
y_test = y.iloc[test_indices]
8067

8168
print(
82-
"X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
83-
X_train.shape,
84-
y_train.shape,
85-
X_test.shape,
86-
y_test.shape,
87-
)
69+
f"X_train.shape: {X_train.shape}, y_train.shape: {y_train.shape}, X_test.shape: {X_test.shape}, y_test.shape: {y_test.shape}"
8870
)
8971

9072
# %% [markdown]
@@ -96,12 +78,7 @@
9678
X, y = task.get_X_and_y()
9779
n_repeats, n_folds, n_samples = task.get_split_dimensions()
9880
print(
99-
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
100-
task_id,
101-
n_repeats,
102-
n_folds,
103-
n_samples,
104-
)
81+
f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
10582
)
10683

10784
# %% [markdown]
@@ -122,16 +99,8 @@
12299
y_test = y.iloc[test_indices]
123100

124101
print(
125-
"Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
126-
"y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
127-
repeat_idx,
128-
fold_idx,
129-
sample_idx,
130-
X_train.shape,
131-
y_train.shape,
132-
X_test.shape,
133-
y_test.shape,
134-
)
102+
f"Repeat #{repeat_idx}, fold #{fold_idx}, samples {sample_idx}: X_train.shape: {X_train.shape}, "
103+
f"y_train.shape {y_train.shape}, X_test.shape {X_test.shape}, y_test.shape {y_test.shape}"
135104
)
136105

137106
# %% [markdown]
@@ -143,12 +112,7 @@
143112
X, y = task.get_X_and_y()
144113
n_repeats, n_folds, n_samples = task.get_split_dimensions()
145114
print(
146-
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
147-
task_id,
148-
n_repeats,
149-
n_folds,
150-
n_samples,
151-
)
115+
f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
152116
)
153117

154118
# %% [markdown]
@@ -169,16 +133,8 @@
169133
y_test = y.iloc[test_indices]
170134

171135
print(
172-
"Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
173-
"y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
174-
repeat_idx,
175-
fold_idx,
176-
sample_idx,
177-
X_train.shape,
178-
y_train.shape,
179-
X_test.shape,
180-
y_test.shape,
181-
)
136+
f"Repeat #{repeat_idx}, fold #{fold_idx}, samples {sample_idx}: X_train.shape: {X_train.shape}, "
137+
f"y_train.shape {y_train.shape}, X_test.shape {X_test.shape}, y_test.shape {y_test.shape}"
182138
)
183139

184140
# %% [markdown]
@@ -190,12 +146,7 @@
190146
X, y = task.get_X_and_y()
191147
n_repeats, n_folds, n_samples = task.get_split_dimensions()
192148
print(
193-
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
194-
task_id,
195-
n_repeats,
196-
n_folds,
197-
n_samples,
198-
)
149+
f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
199150
)
200151

201152
# %% [markdown]
@@ -216,15 +167,6 @@
216167
y_test = y.iloc[test_indices]
217168

218169
print(
219-
"Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
220-
"y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
221-
repeat_idx,
222-
fold_idx,
223-
sample_idx,
224-
X_train.shape,
225-
y_train.shape,
226-
X_test.shape,
227-
y_test.shape,
228-
)
170+
f"Repeat #{repeat_idx}, fold #{fold_idx}, samples {sample_idx}: X_train.shape: {X_train.shape}, "
171+
f"y_train.shape {y_train.shape}, X_test.shape {X_test.shape}, y_test.shape {y_test.shape}"
229172
)
230-
# License: BSD 3-Clause

0 commit comments

Comments
 (0)