Skip to content

Commit 1e85bb6

Browse files
ArlindKadramfeurer
authored andcommitted
[WIP] An example that loads and visualizes the iris dataset (#808)
* An example that loads and visualizes the iris dataset * Changing the simple_datasets_tutorial and deleting new dataset
1 parent 3e23a3b commit 1e85bb6

1 file changed

Lines changed: 43 additions & 4 deletions

File tree

examples/20_basic/simple_datasets_tutorial.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@
33
Datasets
44
========
55
6-
A basic tutorial on how to list and download datasets.
6+
A basic tutorial on how to list, load and visualize datasets.
77
"""
88
############################################################################
9-
import openml
9+
# In general, we recommend working with tasks, so that the results can
10+
# be easily reproduced. Furthermore, the results can be compared to existing results
11+
# at OpenML. However, for the purposes of this tutorial, we are going to work with
12+
# the datasets directly.
1013

14+
import openml
1115
############################################################################
1216
# List datasets
1317
# =============
@@ -19,11 +23,46 @@
1923
# Download a dataset
2024
# ==================
2125

22-
first_dataset_id = int(datasets_df['did'].iloc[0])
23-
dataset = openml.datasets.get_dataset(first_dataset_id)
26+
# Iris dataset https://www.openml.org/d/61
27+
dataset = openml.datasets.get_dataset(61)
2428

2529
# Print a summary
2630
print(f"This is dataset '{dataset.name}', the target feature is "
2731
f"'{dataset.default_target_attribute}'")
2832
print(f"URL: {dataset.url}")
2933
print(dataset.description[:500])
34+
35+
############################################################################
36+
# Load a dataset
37+
# ==============
38+
39+
# X - An array/dataframe where each row represents one example with
40+
# the corresponding feature values.
41+
# y - the classes for each example
42+
# categorical_indicator - an array that indicates which feature is categorical
43+
# attribute_names - the names of the features for the examples (X) and
44+
# target feature (y)
45+
X, y, categorical_indicator, attribute_names = dataset.get_data(
46+
dataset_format='dataframe',
47+
target=dataset.default_target_attribute
48+
)
49+
############################################################################
50+
# Visualize the dataset
51+
# =====================
52+
53+
import pandas as pd
54+
import seaborn as sns
55+
import matplotlib.pyplot as plt
56+
sns.set_style("darkgrid")
57+
58+
59+
def hide_current_axis():
60+
plt.gca().set_visible(False)
61+
62+
63+
# We combine all the data so that we can map the different
64+
# examples to different colors according to the classes.
65+
combined_data = pd.concat([X, y], axis=1)
66+
iris_plot = sns.pairplot(combined_data, hue="class")
67+
iris_plot.map_upper(hide_current_axis)
68+
plt.show()

0 commit comments

Comments
 (0)