openml
diff --git a/‎examples/Advanced/README.txt‎
Lines changed: 0 additions & 4 deletions b/‎examples/Advanced/README.txt‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎examples/Advanced/configure_logging.py‎
Lines changed: 0 additions & 3 deletions b/‎examples/Advanced/configure_logging.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎examples/Advanced/create_upload_tutorial.py‎
Lines changed: 0 additions & 6 deletions b/‎examples/Advanced/create_upload_tutorial.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎examples/Advanced/datasets_tutorial.py‎
Lines changed: 1 addition & 7 deletions b/‎examples/Advanced/datasets_tutorial.py‎
Lines changed: 1 addition & 7 deletions
diff --git a/‎examples/Advanced/fetch_evaluations_tutorial.py‎
Lines changed: 2 additions & 8 deletions b/‎examples/Advanced/fetch_evaluations_tutorial.py‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎examples/Advanced/study_tutorial.py‎
Lines changed: 14 additions & 9 deletions b/‎examples/Advanced/study_tutorial.py‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎examples/Advanced/suites_tutorial.py‎
Lines changed: 3 additions & 16 deletions b/‎examples/Advanced/suites_tutorial.py‎
Lines changed: 3 additions & 16 deletions
diff --git a/‎examples/Advanced/task_manual_iteration_tutorial.py‎
Lines changed: 13 additions & 71 deletions b/‎examples/Advanced/task_manual_iteration_tutorial.py‎
Lines changed: 13 additions & 71 deletions
@@ -1,5 +1,4 @@
 # %% [markdown]
-# # Logging
 # This tutorial explains openml-python logging, and shows how to configure it.
 # Openml-python uses the [Python logging module](https://docs.python.org/3/library/logging.html)
 # to provide users with log messages. Each log message is assigned a level of importance, see
@@ -49,5 +48,3 @@
 # * 0: `logging.WARNING` and up.
 # * 1: `logging.INFO` and up.
 # * 2: `logging.DEBUG` and up (i.e. all messages).
-#
-# License: BSD 3-Clause
@@ -1,5 +1,4 @@
 # %% [markdown]
-# # Dataset upload tutorial
 # A tutorial on how to create and upload a dataset to OpenML.
 
 # %%
@@ -11,10 +10,6 @@
 import openml
 from openml.datasets.functions import create_dataset
 
-# %% [markdown]
-# .. warning::
-#    .. include:: ../../test_server_usage_warning.txt
-
 # %%
 openml.config.start_using_configuration_for_example()
 
@@ -308,4 +303,3 @@
 
 # %%
 openml.config.stop_using_configuration_for_example()
-# License: BSD 3-Clause
@@ -1,5 +1,4 @@
 # %% [markdown]
-# # Datasets
 # How to list and download datasets.
 
 import pandas as pd
@@ -46,8 +45,7 @@
 
 # Print a summary
 print(
-    f"This is dataset '{dataset.name}', the target feature is "
-    f"'{dataset.default_target_attribute}'"
+    f"This is dataset '{dataset.name}', the target feature is '{dataset.default_target_attribute}'"
 )
 print(f"URL: {dataset.url}")
 print(dataset.description[:500])
@@ -106,9 +104,6 @@
 # %% [markdown]
 # ## Edit a created dataset
 # This example uses the test server, to avoid editing a dataset on the main server.
-#
-# .. warning::
-#    .. include:: ../../test_server_usage_warning.txt
 
 # %%
 openml.config.start_using_configuration_for_example()
@@ -165,4 +160,3 @@
 
 # %%
 openml.config.stop_using_configuration_for_example()
-# License: BSD 3-Clauses
@@ -1,6 +1,4 @@
 # %% [markdown]
-# # Fetching Evaluations
-
 # Evaluations contain a concise summary of the results of all runs made. Each evaluation
 # provides information on the dataset used, the flow applied, the setup used, the metric
 # evaluated, and the result obtained on the metric, for each such run made. These collection
@@ -27,9 +25,7 @@
 # We shall retrieve a small set (only 10 entries) to test the listing function for evaluations
 
 # %%
-openml.evaluations.list_evaluations(
-    function="predictive_accuracy", size=10
-)
+openml.evaluations.list_evaluations(function="predictive_accuracy", size=10)
 
 # Using other evaluation metrics, 'precision' in this case
 evals = openml.evaluations.list_evaluations(
@@ -182,6 +178,4 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
     function="predictive_accuracy", flows=[6767], size=100, parameters_in_separate_columns=True
 )
 
-print(evals_setups.head(10))
-
-# License: BSD 3-Clause
+print(evals_setups.head(10))
@@ -1,5 +1,4 @@
 # %% [markdown]
-# # Benchmark studies
 # How to list, download and upload benchmark studies.
 # In contrast to
 # [benchmark suites](https://docs.openml.org/benchmark/#benchmarking-suites) which
@@ -13,7 +12,6 @@
 
 import openml
 
-
 # %% [markdown]
 # ##  Listing studies
 #
@@ -22,14 +20,12 @@
 #   easier-to-work-with data structure
 
 # %%
-studies = openml.study.list_studies(output_format="dataframe", status="all")
+studies = openml.study.list_studies(status="all")
 print(studies.head(n=10))
 
 
 # %% [markdown]
 # ## Downloading studies
-
-# %% [markdown]
 # This is done based on the study ID.
 
 # %%
@@ -62,9 +58,6 @@
 
 # %% [markdown]
 # We'll use the test server for the rest of this tutorial.
-#
-# .. warning::
-#    .. include:: ../../test_server_usage_warning.txt
 
 # %%
 openml.config.start_using_configuration_for_example()
@@ -76,7 +69,20 @@
 # In this examples we'll create a few runs for the OpenML-100 benchmark
 # suite which is available on the OpenML test server.
 
+# <div class="admonition warning">
+#     <p class="admonition-title">Warning</p>
+#     <p>
+#         For the rest of this tutorial, we will require the `openml-sklearn` package.
+#         Install it with `pip install openml-sklearn`.
+#     </p>
+# </div>
+
 # %%
+# Get sklearn extension to run sklearn models easily on OpenML tasks.
+from openml_sklearn import SklearnExtension
+
+extension = SklearnExtension()
+
 # Model to be used
 clf = RandomForestClassifier()
 
@@ -112,4 +118,3 @@
 
 # %%
 openml.config.stop_using_configuration_for_example()
-# License: BSD 3-Clause
 
@@ -1,11 +1,5 @@
 # %% [markdown]
-# # Benchmark suites
-#
 # How to list, download and upload benchmark suites.
-#
-# If you want to learn more about benchmark suites, check out our
-# brief introductory tutorial ["Simple suites tutorial"](../Basics/simple_suites_tutorial) or the
-# [OpenML benchmark docs](https://docs.openml.org/benchmark/#benchmarking-suites).
 
 # %%
 import uuid
@@ -14,7 +8,6 @@
 
 import openml
 
-
 # %% [markdown]
 # ## Listing suites
 #
@@ -23,13 +16,11 @@
 #   easier-to-work-with data structure
 
 # %%
-suites = openml.study.list_suites(output_format="dataframe", status="all")
+suites = openml.study.list_suites(status="all")
 print(suites.head(n=10))
 
 # %% [markdown]
 # ## Downloading suites
-
-# %% [markdown]
 # This is done based on the dataset ID.
 
 # %%
@@ -52,7 +43,7 @@
 # And we can use the task listing functionality to learn more about them:
 
 # %%
-tasks = openml.tasks.list_tasks(output_format="dataframe")
+tasks = openml.tasks.list_tasks()
 
 # %% [markdown]
 # Using ``@`` in
@@ -65,9 +56,6 @@
 
 # %% [markdown]
 # We'll use the test server for the rest of this tutorial.
-#
-# .. warning::
-#    .. include:: ../../test_server_usage_warning.txt
 
 # %%
 openml.config.start_using_configuration_for_example()
@@ -83,7 +71,7 @@
 # the test server:
 
 # %%
-all_tasks = list(openml.tasks.list_tasks(output_format="dataframe")["tid"])
+all_tasks = list(openml.tasks.list_tasks()["tid"])
 task_ids_for_suite = sorted(np.random.choice(all_tasks, replace=False, size=20))
 
 # The study needs a machine-readable and unique alias. To obtain this,
@@ -102,4 +90,3 @@
 
 # %%
 openml.config.stop_using_configuration_for_example()
-# License: BSD 3-Clause
@@ -1,13 +1,5 @@
 # %% [markdown]
-# # Tasks: retrieving splits
-
-# Tasks define a target and a train/test split. Normally, they are the input to the function
-# ``openml.runs.run_model_on_task`` which automatically runs the model on all splits of the task.
-# However, sometimes it is necessary to manually split a dataset to perform experiments outside of
-# the functions provided by OpenML. One such example is in the benchmark library
-# [HPOBench](https://github.com/automl/HPOBench) which extensively uses data from OpenML,
-# but not OpenML's functionality to conduct runs.
-
+# Tasks define a target and a train/test split, which we can use for benchmarking.
 
 # %%
 import openml
@@ -45,12 +37,7 @@
 
 # %%
 print(
-    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id,
-        n_repeats,
-        n_folds,
-        n_samples,
-    )
+    f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
 )
 
 # %% [markdown]
@@ -72,19 +59,14 @@
 # And then split the data based on this:
 
 # %%
-X, y = task.get_X_and_y(dataset_format="dataframe")
+X, y = task.get_X_and_y()
 X_train = X.iloc[train_indices]
 y_train = y.iloc[train_indices]
 X_test = X.iloc[test_indices]
 y_test = y.iloc[test_indices]
 
 print(
-    "X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
-        X_train.shape,
-        y_train.shape,
-        X_test.shape,
-        y_test.shape,
-    )
+    f"X_train.shape: {X_train.shape}, y_train.shape: {y_train.shape}, X_test.shape: {X_test.shape}, y_test.shape: {y_test.shape}"
 )
 
 # %% [markdown]
@@ -96,12 +78,7 @@
 X, y = task.get_X_and_y()
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
-    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id,
-        n_repeats,
-        n_folds,
-        n_samples,
-    )
+    f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
 )
 
 # %% [markdown]
@@ -122,16 +99,8 @@
             y_test = y.iloc[test_indices]
 
             print(
-                "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
-                "y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
-                    repeat_idx,
-                    fold_idx,
-                    sample_idx,
-                    X_train.shape,
-                    y_train.shape,
-                    X_test.shape,
-                    y_test.shape,
-                )
+                f"Repeat #{repeat_idx}, fold #{fold_idx}, samples {sample_idx}: X_train.shape: {X_train.shape}, "
+                f"y_train.shape {y_train.shape}, X_test.shape {X_test.shape}, y_test.shape {y_test.shape}"
             )
 
 # %% [markdown]
@@ -143,12 +112,7 @@
 X, y = task.get_X_and_y()
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
-    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id,
-        n_repeats,
-        n_folds,
-        n_samples,
-    )
+    f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
 )
 
 # %% [markdown]
@@ -169,16 +133,8 @@
             y_test = y.iloc[test_indices]
 
             print(
-                "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
-                "y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
-                    repeat_idx,
-                    fold_idx,
-                    sample_idx,
-                    X_train.shape,
-                    y_train.shape,
-                    X_test.shape,
-                    y_test.shape,
-                )
+                f"Repeat #{repeat_idx}, fold #{fold_idx}, samples {sample_idx}: X_train.shape: {X_train.shape}, "
+                f"y_train.shape {y_train.shape}, X_test.shape {X_test.shape}, y_test.shape {y_test.shape}"
             )
 
 # %% [markdown]
@@ -190,12 +146,7 @@
 X, y = task.get_X_and_y()
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
-    "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id,
-        n_repeats,
-        n_folds,
-        n_samples,
-    )
+    f"Task {task_id}: number of repeats: {n_repeats}, number of folds: {n_folds}, number of samples {n_samples}."
 )
 
 # %% [markdown]
@@ -216,15 +167,6 @@
             y_test = y.iloc[test_indices]
 
             print(
-                "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
-                "y_train.shape {}, X_test.shape {}, y_test.shape {}".format(
-                    repeat_idx,
-                    fold_idx,
-                    sample_idx,
-                    X_train.shape,
-                    y_train.shape,
-                    X_test.shape,
-                    y_test.shape,
-                )
+                f"Repeat #{repeat_idx}, fold #{fold_idx}, samples {sample_idx}: X_train.shape: {X_train.shape}, "
+                f"y_train.shape {y_train.shape}, X_test.shape {X_test.shape}, y_test.shape {y_test.shape}"
             )
-# License: BSD 3-Clause