-
-
Notifications
You must be signed in to change notification settings - Fork 271
[ENH] V1 -> V2 Migration - Flows (module) #1609
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fa3cd40
7e9bc1f
c603383
ff6a8b0
f01898f
5c4511e
29fac2c
bdf53f3
9d0098f
a1a706c
43276d2
1206f69
bde5942
4948e99
a354167
c4e713f
95fc75f
1fe7e3e
54a3151
2b6fe65
685c19a
920ff21
fa53f8d
2b2db96
c7c24a3
c9617f9
92d88e4
443ade9
468087d
5bc37b8
08d9916
9660e78
f60c78b
f25c95b
8caba11
6e577c4
1913c10
7681949
01840a5
26ed4c1
c588d0c
b6ff720
80d5afc
f47112c
d44cf3e
ea7dda1
f0e5947
edcd006
cde0aae
edc5772
aa1e560
06b8497
384da91
1878138
dc26e01
e2d059b
d156ad4
d7a3788
b5b9ef6
68820fe
567eca4
a82a636
d39d9d0
724c4ae
0251f49
e213873
b6f38cd
e06c538
fccb772
3d61b27
b2287c3
b7e285e
fd43c48
f4aab6b
d43cf86
3e323ed
9195fa6
5342eec
adc0e74
bfb2d3e
cabaecf
d1da932
671f077
85c1113
39bf86a
7b66677
d2224c4
9608c36
f6bc7f7
baa3a38
c9e4b73
acfa2bb
63fa0a0
aa9d486
8802b8a
52b93fe
ec9477f
0c480da
c7d9fe5
935b64e
b273193
911f44d
10d134a
935f0f4
9514df8
53bee94
0e05a16
7413191
33b4ca0
a6b9a45
f924b32
541b0f2
acb173f
3e8d1f0
f83bdb5
2a42712
001caad
fb38a2d
03c4ca9
8d708fd
4f75bba
164f66f
c4dae43
36c20a2
ab3c1eb
ff0220e
f8144e2
2a488ca
599c7e1
2867862
f09f3cd
5f731ce
bad7842
aefdb38
0f40b02
7ac1672
6ac1dfe
62924c9
27696bb
190face
95daaa6
7841ea8
cc515aa
e6a92df
1b8c22a
fc839a6
1c922af
ffa9ce9
a7b2d21
27fe790
8965112
72ea1a4
a696c49
755636d
d07af34
2d9c8ec
002b989
045d896
c437966
e27470a
d04d956
9263f7f
79dea29
f6497c2
dce7f54
40dd460
0fc917c
3d86b18
aba3d3e
d99d54d
dc22e3a
7318573
29ef187
cf94c89
298fbda
9870502
33065c2
76b92bb
419edcb
cb6d937
8544c8a
d4c413b
fab1a15
4e75b92
49c6d59
af9f5e6
276324a
73f7594
2ee7fa3
1954640
4be5bbd
9027c01
e5461a9
7d899a9
8587414
23a3450
ac28f82
097e27c
c28c808
2d698ed
6559219
9f3ca32
ffe596d
4a66245
c762fb4
77c21f2
eac24fc
2ed65fe
f3b07de
29db3f1
3b4e538
8ac886b
305f4f0
b2bf164
e97e6c2
c66d73c
aa54e8e
2d452d3
c235812
39eb823
50eed37
7a000eb
79f6187
56de184
b1a9e7f
d716ecf
3c29e71
b4ff0b2
93155ee
d3cc9a7
a6b82f4
3419973
8de99b7
45c85d9
7d61107
1ecbbba
65472ed
44b48b5
04bc83b
f926092
309f136
d274cb5
2896b41
df67df1
2a7e9ce
d0dc185
3eabc19
299cbda
a942d52
b89a084
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,11 +1,302 @@ | ||||||||||||||||||||
| from __future__ import annotations | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from collections.abc import Mapping | ||||||||||||||||||||
| from typing import Any | ||||||||||||||||||||
|
|
||||||||||||||||||||
| import pandas as pd | ||||||||||||||||||||
| import xmltodict | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from openml.exceptions import OpenMLServerError, OpenMLServerException | ||||||||||||||||||||
| from openml.flows.flow import OpenMLFlow | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from .base import FlowAPI, ResourceV1API, ResourceV2API | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| class FlowV1API(ResourceV1API, FlowAPI): | ||||||||||||||||||||
| """Version 1 API implementation for flow resources.""" | ||||||||||||||||||||
| def get( | ||||||||||||||||||||
| self, | ||||||||||||||||||||
| flow_id: int, | ||||||||||||||||||||
| *, | ||||||||||||||||||||
| reset_cache: bool = False, | ||||||||||||||||||||
| ) -> OpenMLFlow: | ||||||||||||||||||||
| """Get a flow from the OpenML server. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Parameters | ||||||||||||||||||||
| ---------- | ||||||||||||||||||||
| flow_id : int | ||||||||||||||||||||
| The ID of the flow to retrieve. | ||||||||||||||||||||
| reset_cache : bool, optional (default=False) | ||||||||||||||||||||
| Whether to reset the cache for this request. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Returns | ||||||||||||||||||||
| ------- | ||||||||||||||||||||
| OpenMLFlow | ||||||||||||||||||||
| The retrieved flow object. | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| response = self._http.get( | ||||||||||||||||||||
| f"flow/{flow_id}", | ||||||||||||||||||||
| enable_cache=True, | ||||||||||||||||||||
| refresh_cache=reset_cache, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| flow_xml = response.text | ||||||||||||||||||||
| return OpenMLFlow._from_dict(xmltodict.parse(flow_xml)) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| def exists(self, name: str, external_version: str) -> int | bool: | ||||||||||||||||||||
| """Check if a flow exists on the OpenML server. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Parameters | ||||||||||||||||||||
| ---------- | ||||||||||||||||||||
| name : str | ||||||||||||||||||||
| The name of the flow. | ||||||||||||||||||||
| external_version : str | ||||||||||||||||||||
| The external version of the flow. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Returns | ||||||||||||||||||||
| ------- | ||||||||||||||||||||
| int | bool | ||||||||||||||||||||
| The flow ID if the flow exists, False otherwise. | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| if not (isinstance(name, str) and len(name) > 0): | ||||||||||||||||||||
| raise ValueError("Argument 'name' should be a non-empty string") | ||||||||||||||||||||
| if not (isinstance(external_version, str) and len(external_version) > 0): | ||||||||||||||||||||
| raise ValueError("Argument 'version' should be a non-empty string") | ||||||||||||||||||||
|
|
||||||||||||||||||||
| data = {"name": name, "external_version": external_version, "api_key": self._http.api_key} | ||||||||||||||||||||
| xml_response = self._http.post("flow/exists", data=data).text | ||||||||||||||||||||
|
Comment on lines
+64
to
+65
|
||||||||||||||||||||
| data = {"name": name, "external_version": external_version, "api_key": self._http.api_key} | |
| xml_response = self._http.post("flow/exists", data=data).text | |
| data = {"name": name, "external_version": external_version} | |
| if self._http.api_key: | |
| data["api_key"] = self._http.api_key | |
| xml_response = self._http.post("flow/exists", data=data, use_api_key=False).text |
Copilot
AI
Feb 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The path parameter in the publish method is marked as optional (str | None), but on line 159 it's immediately overwritten with the hardcoded value "flow". This makes the parameter pointless. Either remove the path parameter from the method signature entirely, or remove the line that overwrites it if the parameter should actually be used.
Copilot
AI
Mar 23, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FlowV2API.exists() interpolates name and external_version directly into the URL path. These values can contain characters that require URL escaping, which can break requests or cause ambiguous routing. Quote/escape each path segment (e.g., urllib.parse.quote) or send them as query parameters if supported by the v2 endpoint.
Copilot
AI
Mar 24, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FlowV2API.exists() currently catches OpenMLServerError and returns False, which will also hide real failures (e.g., authentication/authorization problems, 500s) by reporting "flow does not exist". Only treat the specific "not found" case as non-existence (e.g., catch OpenMLServerNoResult / an OpenMLServerException with the v2 not-found code or 404) and re-raise all other exceptions.
| except (OpenMLServerError, KeyError): | |
| # v2 returns 404 when flow doesn't exist, which raises OpenMLServerError | |
| return False | |
| except OpenMLServerError as err: | |
| # v2 returns HTTP 404 when the flow doesn't exist; only treat that case | |
| # as "not found" and propagate all other server errors. | |
| if getattr(err, "code", None) == 404: | |
| return False | |
| raise |
Copilot
AI
Feb 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similar to the publish method, the list method in FlowV2API should have -> NoReturn as its return type instead of -> pd.DataFrame since it only calls self._not_supported() which never returns.
Copilot
AI
Feb 27, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The publish method signature in FlowV2API has a # type: ignore[override] comment, but this is because the return type doesn't match the parent class ResourceV2API.publish. However, the parent ResourceV2API.publish raises OpenMLNotSupportedError, so it never actually returns. The return type annotation -> int is misleading since the method always raises an exception. Consider using -> NoReturn as the return type or removing the return type annotation entirely since the method only raises.
Omswastik-11 marked this conversation as resolved.
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
|
|
||
| import xmltodict | ||
|
|
||
| import openml | ||
|
||
| from openml.base import OpenMLBase | ||
| from openml.extensions import Extension, get_extension_by_flow | ||
| from openml.utils import extract_xml_tags | ||
|
|
@@ -438,9 +439,14 @@ def publish(self, raise_error_if_exists: bool = False) -> OpenMLFlow: # noqa: F | |
| raise openml.exceptions.PyOpenMLError( | ||
| "Flow does not exist on the server, but 'flow.flow_id' is not None.", | ||
| ) | ||
| super().publish() | ||
| assert self.flow_id is not None # for mypy | ||
| flow_id = self.flow_id | ||
|
|
||
| file_elements = self._get_file_elements() | ||
| if "description" not in file_elements: | ||
| file_elements["description"] = self._to_xml() | ||
|
|
||
| # Use openml._backend.flow.publish which internally calls ResourceV1.publish | ||
| flow_id = openml._backend.flow.publish(path="flow", files=file_elements) | ||
| self.flow_id = flow_id | ||
| elif raise_error_if_exists: | ||
| error_message = f"This OpenMLFlow already exists with id: {flow_id}." | ||
| raise openml.exceptions.PyOpenMLError(error_message) | ||
|
|
@@ -468,6 +474,30 @@ def publish(self, raise_error_if_exists: bool = False) -> OpenMLFlow: # noqa: F | |
| ) from e | ||
| return self | ||
|
|
||
| def push_tag(self, tag: str) -> None: | ||
| """Annotates this flow with a tag on the server. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| tag : str | ||
| Tag to attach to the flow. | ||
| """ | ||
| if self.flow_id is None: | ||
| raise ValueError("Flow does not have an ID. Please publish the flow before tagging.") | ||
| openml._backend.flow.tag(self.flow_id, tag) | ||
|
|
||
|
Comment on lines
+485
to
+488
|
||
| def remove_tag(self, tag: str) -> None: | ||
| """Removes a tag from this flow on the server. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| tag : str | ||
| Tag to remove from the flow. | ||
| """ | ||
| if self.flow_id is None: | ||
| raise ValueError("Flow does not have an ID. Please publish the flow before untagging.") | ||
| openml._backend.flow.untag(self.flow_id, tag) | ||
|
Comment on lines
+477
to
+499
|
||
|
|
||
| def get_structure(self, key_item: str) -> dict[str, list[str]]: | ||
| """ | ||
| Returns for each sub-component of the flow the path of identifiers | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FlowV1API.get()does not detect v1-style error payloads (<oml:error>...) that are returned with HTTP 200.HTTPClientonly validates by status code, so this method can end up passing an error dict intoOpenMLFlow._from_dict()and failing with a confusing parsing error. Please add an<oml:error>check similar toexists()/list()and raiseOpenMLServerExceptionwith the server-provided code/message.