Skip to content

Commit 5b85b77

Browse files
[DOC] Enhance Docstrings of Flows Core Public Functions (#1569)
#### Metadata * Reference Issue: #1538 #### Details enhance the docstrings of flows core public functions, add examples, parameter default, parameter type..etc
1 parent aa04b30 commit 5b85b77

1 file changed

Lines changed: 172 additions & 58 deletions

File tree

openml/flows/functions.py

Lines changed: 172 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,59 @@ def _get_cached_flow(fid: int) -> OpenMLFlow:
7171

7272
@openml.utils.thread_safe_if_oslo_installed
7373
def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = True) -> OpenMLFlow: # noqa: FBT002
74-
"""Download the OpenML flow for a given flow ID.
74+
"""Fetch an OpenMLFlow by its server-assigned ID.
75+
76+
Queries the OpenML REST API for the flow metadata and returns an
77+
:class:`OpenMLFlow` instance. If the flow is already cached locally,
78+
the cached copy is returned. Optionally the flow can be re-instantiated
79+
into a concrete model instance using the registered extension.
7580
7681
Parameters
7782
----------
7883
flow_id : int
7984
The OpenML flow id.
80-
81-
reinstantiate: bool
82-
Whether to reinstantiate the flow to a model instance.
83-
84-
strict_version : bool, default=True
85-
Whether to fail if version requirements are not fulfilled.
85+
reinstantiate : bool, optional (default=False)
86+
If True, convert the flow description into a concrete model instance
87+
using the flow's extension (e.g., sklearn). If conversion fails and
88+
``strict_version`` is True, an exception will be raised.
89+
strict_version : bool, optional (default=True)
90+
When ``reinstantiate`` is True, whether to enforce exact version
91+
requirements for the extension/model. If False, a new flow may
92+
be returned when versions differ.
8693
8794
Returns
8895
-------
89-
flow : OpenMLFlow
90-
the flow
96+
OpenMLFlow
97+
The flow object with metadata; ``model`` may be populated when
98+
``reinstantiate=True``.
99+
100+
Raises
101+
------
102+
OpenMLCacheException
103+
When cached flow files are corrupted or cannot be read.
104+
OpenMLServerException
105+
When the REST API call fails.
106+
107+
Side Effects
108+
------------
109+
- Writes to ``openml.config.cache_directory/flows/{flow_id}/flow.xml``
110+
when the flow is downloaded from the server.
111+
112+
Preconditions
113+
-------------
114+
- Network access to the OpenML server is required unless the flow is cached.
115+
- For private flows, ``openml.config.apikey`` must be set.
116+
117+
Notes
118+
-----
119+
Results are cached to speed up subsequent calls. When ``reinstantiate`` is
120+
True and version mismatches occur, a new flow may be returned to reflect
121+
the converted model (only when ``strict_version`` is False).
122+
123+
Examples
124+
--------
125+
>>> import openml
126+
>>> flow = openml.flows.get_flow(5) # doctest: +SKIP
91127
"""
92128
flow_id = int(flow_id)
93129
flow = _get_flow_description(flow_id)
@@ -138,32 +174,47 @@ def list_flows(
138174
tag: str | None = None,
139175
uploader: str | None = None,
140176
) -> pd.DataFrame:
141-
"""
142-
Return a list of all flows which are on OpenML.
143-
(Supports large amount of results)
177+
"""List flows available on the OpenML server.
178+
179+
This function supports paging and filtering and returns a pandas
180+
DataFrame with one row per flow and columns for id, name, version,
181+
external_version, full_name and uploader.
144182
145183
Parameters
146184
----------
147185
offset : int, optional
148-
the number of flows to skip, starting from the first
186+
Number of flows to skip, starting from the first (for paging).
149187
size : int, optional
150-
the maximum number of flows to return
188+
Maximum number of flows to return.
151189
tag : str, optional
152-
the tag to include
153-
kwargs: dict, optional
154-
Legal filter operators: uploader.
190+
Only return flows having this tag.
191+
uploader : str, optional
192+
Only return flows uploaded by this user.
155193
156194
Returns
157195
-------
158-
flows : dataframe
159-
Each row maps to a dataset
160-
Each column contains the following information:
161-
- flow id
162-
- full name
163-
- name
164-
- version
165-
- external version
166-
- uploader
196+
pandas.DataFrame
197+
Rows correspond to flows. Columns include ``id``, ``full_name``,
198+
``name``, ``version``, ``external_version``, and ``uploader``.
199+
200+
Raises
201+
------
202+
OpenMLServerException
203+
When the API call fails.
204+
205+
Side Effects
206+
------------
207+
- None: results are fetched and returned; Read-only operation.
208+
209+
Preconditions
210+
-------------
211+
- Network access is required to list flows unless cached mechanisms are
212+
used by the underlying API helper.
213+
214+
Examples
215+
--------
216+
>>> import openml
217+
>>> flows = openml.flows.list_flows(size=100) # doctest: +SKIP
167218
"""
168219
listing_call = partial(_list_flows, tag=tag, uploader=uploader)
169220
batches = openml.utils._list_all(listing_call, offset=offset, limit=size)
@@ -206,25 +257,35 @@ def _list_flows(limit: int, offset: int, **kwargs: Any) -> pd.DataFrame:
206257

207258

208259
def flow_exists(name: str, external_version: str) -> int | bool:
209-
"""Retrieves the flow id.
260+
"""Check whether a flow (name + external_version) exists on the server.
210261
211-
A flow is uniquely identified by name + external_version.
262+
The OpenML server defines uniqueness of flows by the pair
263+
``(name, external_version)``. This helper queries the server and
264+
returns the corresponding flow id when present.
212265
213266
Parameters
214267
----------
215-
name : string
216-
Name of the flow
217-
external_version : string
268+
name : str
269+
Flow name (e.g., ``sklearn.tree._classes.DecisionTreeClassifier(1)``).
270+
external_version : str
218271
Version information associated with flow.
219272
220273
Returns
221274
-------
222-
flow_exist : int or bool
223-
flow id iff exists, False otherwise
224-
225-
Notes
226-
-----
227-
see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
275+
int or bool
276+
The flow id if the flow exists on the server, otherwise ``False``.
277+
278+
Raises
279+
------
280+
ValueError
281+
If ``name`` or ``external_version`` are empty or not strings.
282+
OpenMLServerException
283+
When the API request fails.
284+
285+
Examples
286+
--------
287+
>>> import openml
288+
>>> openml.flows.flow_exists("weka.JRip", "Weka_3.9.0_10153") # doctest: +SKIP
228289
"""
229290
if not (isinstance(name, str) and len(name) > 0):
230291
raise ValueError("Argument 'name' should be a non-empty string")
@@ -247,35 +308,58 @@ def get_flow_id(
247308
name: str | None = None,
248309
exact_version: bool = True, # noqa: FBT002
249310
) -> int | bool | list[int]:
250-
"""Retrieves the flow id for a model or a flow name.
311+
"""Retrieve flow id(s) for a model instance or a flow name.
251312
252-
Provide either a model or a name to this function. Depending on the input, it does
313+
Provide either a concrete ``model`` (which will be converted to a flow by
314+
the appropriate extension) or a flow ``name``. Behavior depends on
315+
``exact_version``:
253316
254-
* ``model`` and ``exact_version == True``: This helper function first queries for the necessary
255-
extension. Second, it uses that extension to convert the model into a flow. Third, it
256-
executes ``flow_exists`` to potentially obtain the flow id the flow is published to the
257-
server.
258-
* ``model`` and ``exact_version == False``: This helper function first queries for the
259-
necessary extension. Second, it uses that extension to convert the model into a flow. Third
260-
it calls ``list_flows`` and filters the returned values based on the flow name.
261-
* ``name``: Ignores ``exact_version`` and calls ``list_flows``, then filters the returned
262-
values based on the flow name.
317+
- ``model`` + ``exact_version=True``: convert ``model`` to a flow and call
318+
:func:`flow_exists` to get a single flow id (or False).
319+
- ``model`` + ``exact_version=False``: convert ``model`` to a flow and
320+
return all server flow ids with the same flow name.
321+
- ``name``: ignore ``exact_version`` and return all server flow ids that
322+
match ``name``.
263323
264324
Parameters
265325
----------
266-
model : object
267-
Any model. Must provide either ``model`` or ``name``.
268-
name : str
269-
Name of the flow. Must provide either ``model`` or ``name``.
270-
exact_version : bool
271-
Whether to return the flow id of the exact version or all flow ids where the name
272-
of the flow matches. This is only taken into account for a model where a version number
273-
is available (requires ``model`` to be set).
326+
model : object, optional
327+
A model instance that can be handled by a registered extension. Either
328+
``model`` or ``name`` must be provided.
329+
name : str, optional
330+
Flow name to query for. Either ``model`` or ``name`` must be provided.
331+
exact_version : bool, optional (default=True)
332+
When True and ``model`` is provided, only return the id for the exact
333+
external version. When False, return a list of matching ids.
274334
275335
Returns
276336
-------
277-
int or bool, List
278-
flow id iff exists, ``False`` otherwise, List if ``exact_version is False``
337+
int or bool or list[int]
338+
If ``exact_version`` is True: the flow id if found, otherwise ``False``.
339+
If ``exact_version`` is False: a list of matching flow ids (may be empty).
340+
341+
Raises
342+
------
343+
ValueError
344+
If neither ``model`` nor ``name`` is provided, or if both are provided.
345+
OpenMLServerException
346+
If underlying API calls fail.
347+
348+
Side Effects
349+
------------
350+
- May call server APIs (``flow/exists``, ``flow/list``) and therefore
351+
depends on network access and API keys for private flows.
352+
353+
Examples
354+
--------
355+
>>> import openml
356+
>>> # Lookup by flow name
357+
>>> openml.flows.get_flow_id(name="weka.JRip") # doctest: +SKIP
358+
>>> # Lookup by model instance (requires a registered extension)
359+
>>> import sklearn
360+
>>> import openml_sklearn
361+
>>> clf = sklearn.tree.DecisionTreeClassifier()
362+
>>> openml.flows.get_flow_id(model=clf) # doctest: +SKIP
279363
"""
280364
if model is not None and name is not None:
281365
raise ValueError("Must provide either argument `model` or argument `name`, but not both.")
@@ -391,6 +475,21 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915
391475
392476
check_description : bool
393477
Whether to ignore matching of flow descriptions.
478+
479+
Raises
480+
------
481+
TypeError
482+
When either argument is not an :class:`OpenMLFlow`.
483+
ValueError
484+
When a relevant mismatch is found between the two flows.
485+
486+
Examples
487+
--------
488+
>>> import openml
489+
>>> f1 = openml.flows.get_flow(5) # doctest: +SKIP
490+
>>> f2 = openml.flows.get_flow(5) # doctest: +SKIP
491+
>>> openml.flows.assert_flows_equal(f1, f2) # doctest: +SKIP
492+
>>> # If flows differ, a ValueError is raised
394493
"""
395494
if not isinstance(flow1, OpenMLFlow):
396495
raise TypeError(f"Argument 1 must be of type OpenMLFlow, but is {type(flow1)}")
@@ -550,5 +649,20 @@ def delete_flow(flow_id: int) -> bool:
550649
-------
551650
bool
552651
True if the deletion was successful. False otherwise.
652+
653+
Raises
654+
------
655+
OpenMLServerException
656+
If the server-side deletion fails due to permissions or other errors.
657+
658+
Side Effects
659+
------------
660+
- Removes the flow from the OpenML server (if permitted).
661+
662+
Examples
663+
--------
664+
>>> import openml
665+
>>> # Deletes flow 23 if you are the uploader and it's not linked to runs
666+
>>> openml.flows.delete_flow(23) # doctest: +SKIP
553667
"""
554668
return openml.utils._delete_entity("flow", flow_id)

0 commit comments

Comments
 (0)