|
39 | 39 | exit() |
40 | 40 |
|
41 | 41 | # DEPRECATED EXAMPLE -- Avoid running this code in our CI/CD pipeline |
42 | | -print("This example is deprecated, remove this code to use it manually.") |
43 | | -exit() |
44 | | - |
45 | | -import json |
46 | | -import fanova |
47 | | -import matplotlib.pyplot as plt |
48 | | -import pandas as pd |
49 | | -import seaborn as sns |
50 | | - |
51 | | -import openml |
52 | | - |
53 | | - |
54 | | -############################################################################## |
55 | | -# With the advent of automated machine learning, automated hyperparameter |
56 | | -# optimization methods are by now routinely used in data mining. However, this |
57 | | -# progress is not yet matched by equal progress on automatic analyses that |
58 | | -# yield information beyond performance-optimizing hyperparameter settings. |
59 | | -# In this example, we aim to answer the following two questions: Given an |
60 | | -# algorithm, what are generally its most important hyperparameters? |
61 | | -# |
62 | | -# This work is carried out on the OpenML-100 benchmark suite, which can be |
63 | | -# obtained by ``openml.study.get_suite('OpenML100')``. In this example, we |
64 | | -# conduct the experiment on the Support Vector Machine (``flow_id=7707``) |
65 | | -# with specific kernel (we will perform a post-process filter operation for |
66 | | -# this). We should set some other experimental parameters (number of results |
67 | | -# per task, evaluation measure and the number of trees of the internal |
68 | | -# functional Anova) before the fun can begin. |
69 | | -# |
70 | | -# Note that we simplify the example in several ways: |
71 | | -# |
72 | | -# 1) We only consider numerical hyperparameters |
73 | | -# 2) We consider all hyperparameters that are numerical (in reality, some |
74 | | -# hyperparameters might be inactive (e.g., ``degree``) or irrelevant |
75 | | -# (e.g., ``random_state``) |
76 | | -# 3) We assume all hyperparameters to be on uniform scale |
77 | | -# |
78 | | -# Any difference in conclusion between the actual paper and the presented |
79 | | -# results is most likely due to one of these simplifications. For example, |
80 | | -# the hyperparameter C looks rather insignificant, whereas it is quite |
81 | | -# important when it is put on a log-scale. All these simplifications can be |
82 | | -# addressed by defining a ConfigSpace. For a more elaborated example that uses |
83 | | -# this, please see: |
84 | | -# https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401 |
85 | | - |
86 | | -suite = openml.study.get_suite("OpenML100") |
87 | | -flow_id = 7707 |
88 | | -parameter_filters = {"sklearn.svm.classes.SVC(17)_kernel": "sigmoid"} |
89 | | -evaluation_measure = "predictive_accuracy" |
90 | | -limit_per_task = 500 |
91 | | -limit_nr_tasks = 15 |
92 | | -n_trees = 16 |
93 | | - |
94 | | -fanova_results = [] |
95 | | -# we will obtain all results from OpenML per task. Practice has shown that this places the bottleneck on the |
96 | | -# communication with OpenML, and for iterated experimenting it is better to cache the results in a local file. |
97 | | -for idx, task_id in enumerate(suite.tasks): |
98 | | - if limit_nr_tasks is not None and idx >= limit_nr_tasks: |
99 | | - continue |
100 | | - print( |
101 | | - "Starting with task %d (%d/%d)" |
102 | | - % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks) |
103 | | - ) |
104 | | - # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop) |
105 | | - evals = openml.evaluations.list_evaluations_setups( |
106 | | - evaluation_measure, |
107 | | - flows=[flow_id], |
108 | | - tasks=[task_id], |
109 | | - size=limit_per_task, |
110 | | - output_format="dataframe", |
111 | | - ) |
112 | | - |
113 | | - performance_column = "value" |
114 | | - # make a DataFrame consisting of all hyperparameters (which is a dict in setup['parameters']) and the performance |
115 | | - # value (in setup['value']). The following line looks a bit complicated, but combines 2 tasks: a) combine |
116 | | - # hyperparameters and performance data in a single dict, b) cast hyperparameter values to the appropriate format |
117 | | - # Note that the ``json.loads(...)`` requires the content to be in JSON format, which is only the case for |
118 | | - # scikit-learn setups (and even there some legacy setups might violate this requirement). It will work for the |
119 | | - # setups that belong to the flows embedded in this example though. |
120 | | - try: |
121 | | - setups_evals = pd.DataFrame( |
122 | | - [ |
123 | | - dict( |
124 | | - **{name: json.loads(value) for name, value in setup["parameters"].items()}, |
125 | | - **{performance_column: setup[performance_column]} |
126 | | - ) |
127 | | - for _, setup in evals.iterrows() |
128 | | - ] |
| 42 | +print("This example is deprecated, remove the `if False` in this code to use it manually.") |
| 43 | +if False: |
| 44 | + import json |
| 45 | + import fanova |
| 46 | + import matplotlib.pyplot as plt |
| 47 | + import pandas as pd |
| 48 | + import seaborn as sns |
| 49 | + |
| 50 | + import openml |
| 51 | + |
| 52 | + |
| 53 | + ############################################################################## |
| 54 | + # With the advent of automated machine learning, automated hyperparameter |
| 55 | + # optimization methods are by now routinely used in data mining. However, this |
| 56 | + # progress is not yet matched by equal progress on automatic analyses that |
| 57 | + # yield information beyond performance-optimizing hyperparameter settings. |
| 58 | + # In this example, we aim to answer the following two questions: Given an |
| 59 | + # algorithm, what are generally its most important hyperparameters? |
| 60 | + # |
| 61 | + # This work is carried out on the OpenML-100 benchmark suite, which can be |
| 62 | + # obtained by ``openml.study.get_suite('OpenML100')``. In this example, we |
| 63 | + # conduct the experiment on the Support Vector Machine (``flow_id=7707``) |
| 64 | + # with specific kernel (we will perform a post-process filter operation for |
| 65 | + # this). We should set some other experimental parameters (number of results |
| 66 | + # per task, evaluation measure and the number of trees of the internal |
| 67 | + # functional Anova) before the fun can begin. |
| 68 | + # |
| 69 | + # Note that we simplify the example in several ways: |
| 70 | + # |
| 71 | + # 1) We only consider numerical hyperparameters |
| 72 | + # 2) We consider all hyperparameters that are numerical (in reality, some |
| 73 | + # hyperparameters might be inactive (e.g., ``degree``) or irrelevant |
| 74 | + # (e.g., ``random_state``) |
| 75 | + # 3) We assume all hyperparameters to be on uniform scale |
| 76 | + # |
| 77 | + # Any difference in conclusion between the actual paper and the presented |
| 78 | + # results is most likely due to one of these simplifications. For example, |
| 79 | + # the hyperparameter C looks rather insignificant, whereas it is quite |
| 80 | + # important when it is put on a log-scale. All these simplifications can be |
| 81 | + # addressed by defining a ConfigSpace. For a more elaborated example that uses |
| 82 | + # this, please see: |
| 83 | + # https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401 |
| 84 | + |
| 85 | + suite = openml.study.get_suite("OpenML100") |
| 86 | + flow_id = 7707 |
| 87 | + parameter_filters = {"sklearn.svm.classes.SVC(17)_kernel": "sigmoid"} |
| 88 | + evaluation_measure = "predictive_accuracy" |
| 89 | + limit_per_task = 500 |
| 90 | + limit_nr_tasks = 15 |
| 91 | + n_trees = 16 |
| 92 | + |
| 93 | + fanova_results = [] |
| 94 | + # we will obtain all results from OpenML per task. Practice has shown that this places the bottleneck on the |
| 95 | + # communication with OpenML, and for iterated experimenting it is better to cache the results in a local file. |
| 96 | + for idx, task_id in enumerate(suite.tasks): |
| 97 | + if limit_nr_tasks is not None and idx >= limit_nr_tasks: |
| 98 | + continue |
| 99 | + print( |
| 100 | + "Starting with task %d (%d/%d)" |
| 101 | + % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks) |
129 | 102 | ) |
130 | | - except json.decoder.JSONDecodeError as e: |
131 | | - print("Task %d error: %s" % (task_id, e)) |
132 | | - continue |
133 | | - # apply our filters, to have only the setups that comply to the hyperparameters we want |
134 | | - for filter_key, filter_value in parameter_filters.items(): |
135 | | - setups_evals = setups_evals[setups_evals[filter_key] == filter_value] |
136 | | - # in this simplified example, we only display numerical and float hyperparameters. For categorical hyperparameters, |
137 | | - # the fanova library needs to be informed by using a configspace object. |
138 | | - setups_evals = setups_evals.select_dtypes(include=["int64", "float64"]) |
139 | | - # drop rows with unique values. These are by definition not an interesting hyperparameter, e.g., ``axis``, |
140 | | - # ``verbose``. |
141 | | - setups_evals = setups_evals[ |
142 | | - [ |
143 | | - c |
144 | | - for c in list(setups_evals) |
145 | | - if len(setups_evals[c].unique()) > 1 or c == performance_column |
146 | | - ] |
147 | | - ] |
148 | | - # We are done with processing ``setups_evals``. Note that we still might have some irrelevant hyperparameters, e.g., |
149 | | - # ``random_state``. We have dropped some relevant hyperparameters, i.e., several categoricals. Let's check it out: |
150 | | - |
151 | | - # determine x values to pass to fanova library |
152 | | - parameter_names = [ |
153 | | - pname for pname in setups_evals.columns.to_numpy() if pname != performance_column |
154 | | - ] |
155 | | - evaluator = fanova.fanova.fANOVA( |
156 | | - X=setups_evals[parameter_names].to_numpy(), |
157 | | - Y=setups_evals[performance_column].to_numpy(), |
158 | | - n_trees=n_trees, |
159 | | - ) |
160 | | - for idx, pname in enumerate(parameter_names): |
| 103 | + # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop) |
| 104 | + evals = openml.evaluations.list_evaluations_setups( |
| 105 | + evaluation_measure, |
| 106 | + flows=[flow_id], |
| 107 | + tasks=[task_id], |
| 108 | + size=limit_per_task, |
| 109 | + output_format="dataframe", |
| 110 | + ) |
| 111 | + |
| 112 | + performance_column = "value" |
| 113 | + # make a DataFrame consisting of all hyperparameters (which is a dict in setup['parameters']) and the performance |
| 114 | + # value (in setup['value']). The following line looks a bit complicated, but combines 2 tasks: a) combine |
| 115 | + # hyperparameters and performance data in a single dict, b) cast hyperparameter values to the appropriate format |
| 116 | + # Note that the ``json.loads(...)`` requires the content to be in JSON format, which is only the case for |
| 117 | + # scikit-learn setups (and even there some legacy setups might violate this requirement). It will work for the |
| 118 | + # setups that belong to the flows embedded in this example though. |
161 | 119 | try: |
162 | | - fanova_results.append( |
163 | | - { |
164 | | - "hyperparameter": pname.split(".")[-1], |
165 | | - "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"], |
166 | | - } |
| 120 | + setups_evals = pd.DataFrame( |
| 121 | + [ |
| 122 | + dict( |
| 123 | + **{name: json.loads(value) for name, value in setup["parameters"].items()}, |
| 124 | + **{performance_column: setup[performance_column]} |
| 125 | + ) |
| 126 | + for _, setup in evals.iterrows() |
| 127 | + ] |
167 | 128 | ) |
168 | | - except RuntimeError as e: |
169 | | - # functional ANOVA sometimes crashes with a RuntimeError, e.g., on tasks where the performance is constant |
170 | | - # for all configurations (there is no variance). We will skip these tasks (like the authors did in the |
171 | | - # paper). |
| 129 | + except json.decoder.JSONDecodeError as e: |
172 | 130 | print("Task %d error: %s" % (task_id, e)) |
173 | 131 | continue |
| 132 | + # apply our filters, to have only the setups that comply to the hyperparameters we want |
| 133 | + for filter_key, filter_value in parameter_filters.items(): |
| 134 | + setups_evals = setups_evals[setups_evals[filter_key] == filter_value] |
| 135 | + # in this simplified example, we only display numerical and float hyperparameters. For categorical hyperparameters, |
| 136 | + # the fanova library needs to be informed by using a configspace object. |
| 137 | + setups_evals = setups_evals.select_dtypes(include=["int64", "float64"]) |
| 138 | + # drop rows with unique values. These are by definition not an interesting hyperparameter, e.g., ``axis``, |
| 139 | + # ``verbose``. |
| 140 | + setups_evals = setups_evals[ |
| 141 | + [ |
| 142 | + c |
| 143 | + for c in list(setups_evals) |
| 144 | + if len(setups_evals[c].unique()) > 1 or c == performance_column |
| 145 | + ] |
| 146 | + ] |
| 147 | + # We are done with processing ``setups_evals``. Note that we still might have some irrelevant hyperparameters, e.g., |
| 148 | + # ``random_state``. We have dropped some relevant hyperparameters, i.e., several categoricals. Let's check it out: |
174 | 149 |
|
175 | | -# transform ``fanova_results`` from a list of dicts into a DataFrame |
176 | | -fanova_results = pd.DataFrame(fanova_results) |
177 | | - |
178 | | -############################################################################## |
179 | | -# make the boxplot of the variance contribution. Obviously, we can also use |
180 | | -# this data to make the Nemenyi plot, but this relies on the rather complex |
181 | | -# ``Orange`` dependency (``pip install Orange3``). For the complete example, |
182 | | -# the reader is referred to the more elaborate script (referred to earlier) |
183 | | -fig, ax = plt.subplots() |
184 | | -sns.boxplot(x="hyperparameter", y="fanova", data=fanova_results, ax=ax) |
185 | | -ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right") |
186 | | -ax.set_ylabel("Variance Contribution") |
187 | | -ax.set_xlabel(None) |
188 | | -plt.tight_layout() |
189 | | -plt.show() |
| 150 | + # determine x values to pass to fanova library |
| 151 | + parameter_names = [ |
| 152 | + pname for pname in setups_evals.columns.to_numpy() if pname != performance_column |
| 153 | + ] |
| 154 | + evaluator = fanova.fanova.fANOVA( |
| 155 | + X=setups_evals[parameter_names].to_numpy(), |
| 156 | + Y=setups_evals[performance_column].to_numpy(), |
| 157 | + n_trees=n_trees, |
| 158 | + ) |
| 159 | + for idx, pname in enumerate(parameter_names): |
| 160 | + try: |
| 161 | + fanova_results.append( |
| 162 | + { |
| 163 | + "hyperparameter": pname.split(".")[-1], |
| 164 | + "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"], |
| 165 | + } |
| 166 | + ) |
| 167 | + except RuntimeError as e: |
| 168 | + # functional ANOVA sometimes crashes with a RuntimeError, e.g., on tasks where the performance is constant |
| 169 | + # for all configurations (there is no variance). We will skip these tasks (like the authors did in the |
| 170 | + # paper). |
| 171 | + print("Task %d error: %s" % (task_id, e)) |
| 172 | + continue |
| 173 | + |
| 174 | + # transform ``fanova_results`` from a list of dicts into a DataFrame |
| 175 | + fanova_results = pd.DataFrame(fanova_results) |
| 176 | + |
| 177 | + ############################################################################## |
| 178 | + # make the boxplot of the variance contribution. Obviously, we can also use |
| 179 | + # this data to make the Nemenyi plot, but this relies on the rather complex |
| 180 | + # ``Orange`` dependency (``pip install Orange3``). For the complete example, |
| 181 | + # the reader is referred to the more elaborate script (referred to earlier) |
| 182 | + fig, ax = plt.subplots() |
| 183 | + sns.boxplot(x="hyperparameter", y="fanova", data=fanova_results, ax=ax) |
| 184 | + ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right") |
| 185 | + ax.set_ylabel("Variance Contribution") |
| 186 | + ax.set_xlabel(None) |
| 187 | + plt.tight_layout() |
| 188 | + plt.show() |
0 commit comments