Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
305 commits
Select commit Hold shift + click to select a range
05dec9c
fixed minio methods and edit migration
JATAYU000 Jan 21, 2026
2012a72
Merge base pr
JATAYU000 Jan 21, 2026
35f450a
Updated Minio, download methods
JATAYU000 Jan 21, 2026
9d0098f
Merge branch 'main' into issue1564
fkiraly Jan 21, 2026
5080752
checksum for arff download
JATAYU000 Jan 22, 2026
24582ef
use cache, new enpoints
JATAYU000 Jan 22, 2026
d789e83
lazy imports
JATAYU000 Jan 22, 2026
43276d2
fix import in resources/base.py
geetu040 Jan 23, 2026
1206f69
refactor and add exception handling
geetu040 Jan 26, 2026
bde5942
Merge branch 'main' into issue1564
satvshr Jan 26, 2026
4948e99
refactor resources/base/
geetu040 Jan 26, 2026
a354167
implement delete
geetu040 Jan 26, 2026
1fe7e3e
implement publish and minor refactoring
geetu040 Jan 27, 2026
8d51fea
Fixes some tests
JATAYU000 Jan 27, 2026
bd85ec4
fix conflicts
JATAYU000 Jan 27, 2026
54a3151
implement tag/untag
geetu040 Jan 27, 2026
2b6fe65
implement fallback
geetu040 Jan 27, 2026
f4d6e75
code 103, dataset download
JATAYU000 Jan 27, 2026
0c16547
New tests partial
JATAYU000 Jan 27, 2026
fa53f8d
add test_http.py
geetu040 Jan 28, 2026
2b2db96
add uses_test_server marker
geetu040 Jan 28, 2026
f4c739d
Update delete test
JATAYU000 Jan 28, 2026
b87feb5
Merge commit 'refs/pull/1576/head' into dataset_resource
JATAYU000 Jan 28, 2026
c9617f9
implement reset_cache
geetu040 Jan 29, 2026
6bb20c3
Post update, v2 tests additions
JATAYU000 Jan 29, 2026
7d7fa43
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 29, 2026
5bc37b8
fixes with publish/delete
geetu040 Jan 29, 2026
08d9916
fix cache_key in tests
geetu040 Jan 29, 2026
8caba11
update _not_supported
geetu040 Jan 30, 2026
b4a817a
Merge commit '/pull/1576'
JATAYU000 Jan 30, 2026
1913c10
add 'get_api_config' skeleton method
SimonBlanke Jan 30, 2026
7681949
remove 'APISettings'
SimonBlanke Jan 30, 2026
01840a5
impl. 'get_api_config'
SimonBlanke Jan 30, 2026
26ed4c1
add singleton pattern for settings
SimonBlanke Jan 30, 2026
c588d0c
add 'reset_settings'
SimonBlanke Jan 30, 2026
b6ff720
remove unused code
SimonBlanke Jan 30, 2026
80d5afc
reimplement usage of v1 settings config
SimonBlanke Jan 30, 2026
f47112c
first try v2, fallback to v1 if not available
SimonBlanke Jan 30, 2026
d44cf3e
reimplement singelton without the use of 'global'
SimonBlanke Jan 30, 2026
ea7dda1
add explanations
SimonBlanke Jan 30, 2026
f0e5947
change usage of settings to new impl.
SimonBlanke Jan 30, 2026
edcd006
add explanations
SimonBlanke Jan 30, 2026
cde0aae
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 30, 2026
aa1e560
move to config: APIVersion, ResourceType
geetu040 Feb 1, 2026
06b8497
remove api_context entirely
geetu040 Feb 1, 2026
384da91
major refactor
geetu040 Feb 1, 2026
1878138
more refactoring with setup/
geetu040 Feb 2, 2026
dc26e01
implement APIBackend as controller
geetu040 Feb 2, 2026
e2d059b
move enums
geetu040 Feb 2, 2026
d156ad4
module level imports
geetu040 Feb 2, 2026
d7a3788
module level import for _backend
geetu040 Feb 2, 2026
b5b9ef6
module level import for tests
geetu040 Feb 2, 2026
68820fe
Merge branch 'main' into migration
geetu040 Feb 2, 2026
7288284
New tests,cache exception in download
JATAYU000 Feb 2, 2026
567eca4
add test: test_tag_and_untag
geetu040 Feb 2, 2026
d23790b
Merge, post fix
JATAYU000 Feb 2, 2026
95e8890
md5_checksum, and request fix
JATAYU000 Feb 2, 2026
16c9251
Merge branch 'openml:main' into dataset_resource
JATAYU000 Feb 2, 2026
23fe19b
Merge commit /pull/1576
JATAYU000 Feb 2, 2026
be29dc9
Merge branch 'dataset_resource'
JATAYU000 Feb 2, 2026
b2287c3
implement get/set_config_values
geetu040 Feb 3, 2026
b7e285e
improve APIBackend.set_config_values
geetu040 Feb 3, 2026
fd43c48
use LegacyConfig
geetu040 Feb 3, 2026
f4aab6b
Revert "use LegacyConfig"
geetu040 Feb 3, 2026
d43cf86
implement _sync_api_config
geetu040 Feb 3, 2026
3e323ed
update tests with _sync_api_config
geetu040 Feb 3, 2026
9195fa6
rename config: timeout -> timeout_seconds
geetu040 Feb 3, 2026
5342eec
use timedelta for default ttl value
geetu040 Feb 3, 2026
adc0e74
update tests, adds v2/fallback
geetu040 Feb 3, 2026
bfb2d3e
add MinIOClient in TestBase
geetu040 Feb 3, 2026
707e1f1
publish,tag methods need testing
JATAYU000 Feb 3, 2026
cabaecf
fix linting for builder
geetu040 Feb 3, 2026
79cf49c
new migration tests
JATAYU000 Feb 3, 2026
5c8791a
Merge /1576
JATAYU000 Feb 3, 2026
85c1113
fix unbound variables: "code", "message"
geetu040 Feb 4, 2026
39bf86a
use requests.Session()
geetu040 Feb 4, 2026
7b66677
remove "timeout_seconds" entirely
geetu040 Feb 4, 2026
d2224c4
update/refactor tests
geetu040 Feb 4, 2026
9608c36
remove unused current_api_version from TestAPIBase
geetu040 Feb 5, 2026
f6bc7f7
make TestAPIBase inherit TestBase
geetu040 Feb 5, 2026
baa3a38
nits: test classes
geetu040 Feb 5, 2026
29c93d1
Review changes, new tests
JATAYU000 Feb 5, 2026
7674b3a
Merge bse migration
JATAYU000 Feb 5, 2026
ddb0774
Doc strings
JATAYU000 Feb 5, 2026
52b93fe
minor fix in _sync_api_config
geetu040 Feb 6, 2026
ec9477f
chore: rerun CI
geetu040 Feb 6, 2026
cea6188
delete mock, decorator
JATAYU000 Feb 9, 2026
3d4e84d
Merge base
JATAYU000 Feb 9, 2026
839bd33
delete url in test
JATAYU000 Feb 9, 2026
8417349
New test design
JATAYU000 Feb 10, 2026
10d134a
remove duplicates in _api/resources/__init__.py
geetu040 Feb 10, 2026
935f0f4
implement HTTPClient.download and add tests
geetu040 Feb 10, 2026
9514df8
add docstrings
geetu040 Feb 11, 2026
09f9ad6
Review changes
JATAYU000 Feb 12, 2026
0b52427
Merge base pr
JATAYU000 Feb 12, 2026
53bee94
update minio
geetu040 Feb 12, 2026
33b4ca0
make delay functions static
geetu040 Feb 13, 2026
a6b9a45
rename: retry_raise_e -> exception
geetu040 Feb 13, 2026
f924b32
use context-manager for requests.Session
geetu040 Feb 13, 2026
541b0f2
remove "assert response is not None"
geetu040 Feb 13, 2026
acb173f
verify checksum before caching
geetu040 Feb 13, 2026
3e8d1f0
update tests
geetu040 Feb 13, 2026
f83bdb5
minor fix in ResourceV1API.untag
geetu040 Feb 13, 2026
969c7d8
Merge branch 'main' into dataset_resource
JATAYU000 Feb 16, 2026
2a42712
remove cache.ttl
geetu040 Feb 16, 2026
001caad
replace config.cache.dir with config.cache_dir
geetu040 Feb 16, 2026
fb38a2d
make HTTPClient.cache compulsory
geetu040 Feb 17, 2026
03c4ca9
remove unused OpenMLCacheRequiredError
geetu040 Feb 17, 2026
8d708fd
implement and use TestAPIBase._create_resource
geetu040 Feb 17, 2026
4f75bba
make ResourceAPI.minio compulsory
geetu040 Feb 17, 2026
164f66f
Merge branch 'main' into migration
geetu040 Feb 17, 2026
c4dae43
rename: use_cache -> enable_cache; reset_cache -> refresh_cache
geetu040 Feb 17, 2026
36c20a2
use server config from TestBase
geetu040 Feb 17, 2026
ab3c1eb
tests: mock HTTP post calls to prevent race conditions
geetu040 Feb 17, 2026
0fc3c74
Merge bse_migration into dataset_resource
JATAYU000 Feb 17, 2026
741a66b
rename cache params
JATAYU000 Feb 17, 2026
81dff8d
Merge branch 'dataset_resource'
JATAYU000 Feb 17, 2026
27ac86f
Minio assertions, other reviews
JATAYU000 Feb 17, 2026
2a488ca
Merge branch 'main' into migration
geetu040 Feb 18, 2026
cbc7194
Merge base migration
JATAYU000 Feb 18, 2026
599c7e1
remove hardcoded server in TestHTTPClient.test_cache
geetu040 Feb 18, 2026
2867862
fix docstring in _resolve_default_cache_dir
geetu040 Feb 18, 2026
f09f3cd
fix docstring in ResourceAPI
geetu040 Feb 18, 2026
5f731ce
remove duplicates in __all__
geetu040 Feb 18, 2026
bad7842
remove ttl related code/docs
geetu040 Feb 18, 2026
aefdb38
remove delay methods in HTTPClient
geetu040 Feb 18, 2026
0f40b02
minor fix in _resolve_default_cache_dir
geetu040 Feb 18, 2026
7ac1672
update FallbackProxy
geetu040 Feb 18, 2026
6ac1dfe
simplify _backend creation
geetu040 Feb 18, 2026
62924c9
Merge branch 'main' into migration
geetu040 Feb 18, 2026
27696bb
req changes
satvshr Feb 20, 2026
190face
resolve conflicts
satvshr Feb 20, 2026
95daaa6
remove old config file
satvshr Feb 20, 2026
7841ea8
added OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR
satvshr Feb 20, 2026
cc515aa
bug fixing
satvshr Feb 20, 2026
e6a92df
armagh fix
satvshr Feb 20, 2026
1b8c22a
update content_type check
geetu040 Feb 20, 2026
fc839a6
Revert "make delay functions static"
geetu040 Feb 20, 2026
1c922af
Revert "remove delay methods in HTTPClient"
geetu040 Feb 20, 2026
ffa9ce9
Merge branch 'main' into migration
geetu040 Feb 20, 2026
a7b2d21
allow api_key=None
geetu040 Feb 20, 2026
27fe790
add tests for api_key=None
geetu040 Feb 20, 2026
8965112
update cache not found message
geetu040 Feb 23, 2026
72ea1a4
update docs for path in HTTPCache
geetu040 Feb 23, 2026
a696c49
remove elapsed from cached meta
geetu040 Feb 23, 2026
755636d
move self.headers to _HEADERS
geetu040 Feb 23, 2026
d07af34
fix indentation in docstrings of _resolve_default_cache_dir
geetu040 Feb 23, 2026
2d9c8ec
Update openml/_api/clients/http.py
geetu040 Feb 23, 2026
002b989
Merge branch 'main' into migration
geetu040 Feb 23, 2026
045d896
move _handle_delete_exception and_get_endpoint_name, legal_resources
geetu040 Feb 23, 2026
c437966
set HTTPClient.headers
geetu040 Feb 23, 2026
e27470a
remove main_tag
geetu040 Feb 23, 2026
d04d956
remove and merge TestAPIBase into TestBase
geetu040 Feb 23, 2026
9263f7f
minor change in TestHTTPClient.test_cache
geetu040 Feb 23, 2026
79dea29
make HTTPClient.request private
geetu040 Feb 23, 2026
f6497c2
Revert "update FallbackProxy"
geetu040 Feb 23, 2026
dce7f54
use st_ctime instead of st_ctime for cache refresh test
geetu040 Feb 23, 2026
40dd460
Merge branch 'main' into issue1564
geetu040 Feb 24, 2026
0fc917c
majore config refactor
geetu040 Feb 24, 2026
3d86b18
Merge branch 'pr-1577' into migration
geetu040 Feb 24, 2026
aba3d3e
update _config.py
geetu040 Feb 24, 2026
d99d54d
update test_openml_cache_dir_env_var
geetu040 Feb 24, 2026
dc22e3a
fix mutable SERVERS_REGISTRY
geetu040 Feb 25, 2026
7318573
update set_api_version for fallback
geetu040 Feb 25, 2026
29ef187
minor fix
geetu040 Feb 25, 2026
cf94c89
fixes for test_config
geetu040 Feb 25, 2026
298fbda
fixes in conftest urls
geetu040 Feb 25, 2026
9870502
update test_http.py
geetu040 Feb 25, 2026
33065c2
undo changes with test_openml_cache_dir_env_var
geetu040 Feb 25, 2026
76b92bb
fix server mode in test_config.py
geetu040 Feb 25, 2026
419edcb
move _HEADERS to confing
geetu040 Feb 25, 2026
cb6d937
add fixtures for migration tests
geetu040 Feb 25, 2026
8544c8a
update test_http.py with fixtures
geetu040 Feb 25, 2026
d4c413b
update test_versions.py
geetu040 Feb 25, 2026
fab1a15
update test_versions.py
geetu040 Feb 25, 2026
6392be8
Merge base-migration
JATAYU000 Feb 25, 2026
276324a
fix error message in HTTPClient.server
geetu040 Feb 26, 2026
73f7594
fixes in test_versions.py: use DummyTaskAPI instead of TaskAPI
geetu040 Feb 26, 2026
2ee7fa3
add clients in openml._backend
geetu040 Feb 26, 2026
4f37607
skip parquet env var
JATAYU000 Feb 26, 2026
c74754a
Merge base-migration
JATAYU000 Feb 26, 2026
2473208
Updated test,admin fixture
JATAYU000 Feb 26, 2026
7afb0e3
code qulity Reviews
JATAYU000 Feb 26, 2026
3b96559
Test fixes
JATAYU000 Feb 26, 2026
ea80785
remove unnecessary
JATAYU000 Feb 26, 2026
83a2e80
Fix mock delete
JATAYU000 Feb 26, 2026
9eb6c90
Exception review
JATAYU000 Feb 26, 2026
4be5bbd
fixes with openml.config.[server|apikey] leakage
geetu040 Feb 26, 2026
9027c01
remove unused fixtures: use_api_[v1|v2]
geetu040 Feb 26, 2026
c1efdeb
Merge base-mgration
JATAYU000 Feb 27, 2026
dd048d5
mock requests
JATAYU000 Feb 27, 2026
98041ed
skip v2 test for now
JATAYU000 Feb 27, 2026
e5461a9
add more config tests
geetu040 Feb 27, 2026
7d899a9
make SERVERS_REGISTRY private
geetu040 Feb 27, 2026
8587414
fix marker: uses_test_server->test_server
geetu040 Feb 27, 2026
23a3450
fix UserWarning
geetu040 Feb 27, 2026
ac28f82
update fixture: with_server
geetu040 Feb 27, 2026
4a66245
req changes
satvshr Mar 2, 2026
c762fb4
Merge branch 'issue1564' of https://github.com/satvshr/openml-python …
satvshr Mar 2, 2026
77c21f2
Update openml/_api/clients/http.py
geetu040 Mar 4, 2026
eac24fc
Update tests/test_api/test_http.py
geetu040 Mar 4, 2026
2ed65fe
update test_get_uses_cached_response
geetu040 Mar 4, 2026
f3b07de
test_get_with_api_key
geetu040 Mar 4, 2026
29db3f1
use .arff instead of .bin in tests
geetu040 Mar 4, 2026
3b4e538
update test_download_creates_file to use md5_checksum
geetu040 Mar 4, 2026
8ac886b
update test_download_is_cached_on_disk
geetu040 Mar 4, 2026
305f4f0
update APIBackendBuilder
geetu040 Mar 4, 2026
b2bf164
Merge branch 'main' into migration
geetu040 Mar 4, 2026
e97e6c2
Update openml/_api/clients/http.py
geetu040 Mar 4, 2026
c66d73c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 4, 2026
aa54e8e
pre-commit fixes
geetu040 Mar 4, 2026
2d452d3
Merge branch 'main' into issue1564
geetu040 Mar 6, 2026
c235812
Merge branch 'main' into issue1564
fkiraly Mar 6, 2026
39eb823
Trigger CI
satvshr Mar 6, 2026
50eed37
Merge branch 'main' into migration
geetu040 Mar 6, 2026
7a000eb
Merge branch 'main' into issue1564
geetu040 Mar 10, 2026
79f6187
Merge branch 'main' into issue1564
geetu040 Mar 10, 2026
b1a9e7f
Merge branch 'pr-1577' into migration (merge conflicts)
geetu040 Mar 12, 2026
d716ecf
update server methods in config
geetu040 Mar 12, 2026
3c29e71
fix api-version leakage in tests
geetu040 Mar 12, 2026
b4ff0b2
remove unused migration code
geetu040 Mar 12, 2026
93155ee
debug ci: separate cache for each test-case
geetu040 Mar 12, 2026
d3cc9a7
update port for localhost
geetu040 Mar 12, 2026
a6b82f4
Revert "debug ci: separate cache for each test-case"
geetu040 Mar 12, 2026
3419973
rerun CI
geetu040 Mar 12, 2026
8de99b7
Merge branch 'main' into migration
geetu040 Mar 12, 2026
d0202b0
Merge base migration
JATAYU000 Mar 13, 2026
0fa9e3b
Fix tests for new test setup
JATAYU000 Mar 16, 2026
7d61107
create enum ServerMode
geetu040 Mar 16, 2026
1ecbbba
update config for ServerMode
geetu040 Mar 16, 2026
65472ed
update tests for ServerMode
geetu040 Mar 16, 2026
9219266
Update status_update
JATAYU000 Mar 17, 2026
44b48b5
udpate apikey in _TEST_SERVERS_LOCAL
geetu040 Mar 17, 2026
11b19de
skip v2 status_update
JATAYU000 Mar 18, 2026
4df12d3
Merge base migration
JATAYU000 Mar 18, 2026
77d2af2
skip v2 status_update
JATAYU000 Mar 18, 2026
04bc83b
fix: remove duplicate server name in cache path
geetu040 Mar 23, 2026
f926092
test: remove check for ":" since windows CI expects it
geetu040 Mar 23, 2026
8072e34
adds marker
JATAYU000 Mar 23, 2026
47464e9
Merge base migration
JATAYU000 Mar 23, 2026
f059e71
switch 1 worker test
JATAYU000 Mar 23, 2026
b6d5e31
Merge main
JATAYU000 Mar 23, 2026
4ee28f1
reduce test workers
JATAYU000 Mar 23, 2026
509b4c3
revert workers, static patch
JATAYU000 Mar 24, 2026
6385597
fixes
JATAYU000 Mar 24, 2026
5fea9c9
exists check in load
JATAYU000 Mar 24, 2026
4b43003
updating cache tests
JATAYU000 Mar 24, 2026
f01db35
Revert "updating cache tests"
JATAYU000 Mar 24, 2026
e10d776
fixes
JATAYU000 Mar 24, 2026
ba7edd8
update lazy_behaviour tests, helper functions on paths
JATAYU000 Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openml/_api/clients/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ def delete(
def download(
self,
url: str,
handler: Callable[[Response, Path, str], None] | None = None,
handler: Callable[[Response, Path, str], Path | None] | None = None,
encoding: str = "utf-8",
file_name: str = "response.txt",
md5_checksum: str | None = None,
Expand Down
133 changes: 132 additions & 1 deletion openml/_api/clients/minio.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
from __future__ import annotations

import contextlib
import shutil
import urllib
import zipfile
from pathlib import Path

import minio
import requests
from urllib3 import ProxyManager

import openml
from openml.utils import ProgressBar


class MinIOClient:
Expand All @@ -16,13 +25,135 @@ class MinIOClient:

Attributes
----------
path : pathlib.Path or None
path : pathlib.Path
Configured base path for storage operations.
headers : dict of str to str
Default HTTP headers, including a user-agent identifying the
OpenML Python client version.
"""

@property
def headers(self) -> dict[str, str]:
return openml.config._HEADERS

@property
def path(self) -> Path:
return Path(openml.config.get_cache_directory())

def _get_path(self, url: str) -> Path:
parsed_url = urllib.parse.urlparse(url)
return self.path / "minio" / parsed_url.path.lstrip("/")

def download_minio_file(
self,
source: str,
destination: str | Path | None = None,
exists_ok: bool = True, # noqa: FBT002
proxy: str | None = "auto",
) -> Path:
"""Download file ``source`` from a MinIO Bucket and store it at ``destination``.

Parameters
----------
source : str
URL to a file in a MinIO bucket.
destination : str | Path
Path to store the file to, if a directory is provided the original filename is used.
exists_ok : bool, optional (default=True)
If False, raise FileExists if a file already exists in ``destination``.
proxy: str, optional (default = "auto")
The proxy server to use. By default it's "auto" which uses ``requests`` to
automatically find the proxy to use. Pass None or the environment variable
``no_proxy="*"`` to disable proxies.
"""
destination = self._get_path(source) if destination is None else Path(destination)
parsed_url = urllib.parse.urlparse(source)

# expect path format: /BUCKET/path/to/file.ext
bucket, object_name = parsed_url.path[1:].split("/", maxsplit=1)
if destination.is_dir():
destination = Path(destination, object_name)
if destination.is_file() and not exists_ok:
raise FileExistsError(f"File already exists in {destination}.")

destination = destination.expanduser()
destination.parent.mkdir(parents=True, exist_ok=True)

if proxy == "auto":
resolved_proxies = requests.utils.get_environ_proxies(parsed_url.geturl())
proxy = requests.utils.select_proxy(parsed_url.geturl(), resolved_proxies) # type: ignore

proxy_client = ProxyManager(proxy) if proxy else None

client = minio.Minio(endpoint=parsed_url.netloc, secure=False, http_client=proxy_client)
try:
client.fget_object(
bucket_name=bucket,
object_name=object_name,
file_path=str(destination),
progress=ProgressBar() if openml.config.show_progress else None,
request_headers=self.headers,
)
if destination.is_file() and destination.suffix == ".zip":
with zipfile.ZipFile(destination, "r") as zip_ref:
zip_ref.extractall(destination.parent)

except minio.error.S3Error as e:
if e.message is not None and e.message.startswith("Object does not exist"):
raise FileNotFoundError(f"Object at '{source}' does not exist.") from e
# e.g. permission error, or a bucket does not exist (which is also interpreted as a
# permission error on minio level).
raise FileNotFoundError("Bucket does not exist or is private.") from e

return destination

def download_minio_bucket(self, source: str, destination: str | Path | None = None) -> None:
"""Download file ``source`` from a MinIO Bucket and store it at ``destination``.

Does not redownload files which already exist.

Parameters
----------
source : str
URL to a MinIO bucket.
destination : str | Path
Path to a directory to store the bucket content in.
"""
destination = self._get_path(source) if destination is None else Path(destination)
parsed_url = urllib.parse.urlparse(source)
if destination.suffix:
destination = destination.parent
# expect path format: /BUCKET/path/to/file.ext
_, bucket, *prefixes, _ = parsed_url.path.split("/")
prefix = "/".join(prefixes)

client = minio.Minio(endpoint=parsed_url.netloc, secure=False)

for file_object in client.list_objects(bucket, prefix=prefix, recursive=True):
if file_object.object_name is None:
raise ValueError(f"Object name is None for object {file_object!r}")
if file_object.etag is None:
raise ValueError(f"Object etag is None for object {file_object!r}")

marker = destination / file_object.etag
if marker.exists():
continue

file_destination = destination / file_object.object_name.rsplit("/", 1)[1]
if (file_destination.parent / file_destination.stem).exists():
# Marker is missing but archive exists means the server archive changed
# force a refresh
shutil.rmtree(file_destination.parent / file_destination.stem)

with contextlib.suppress(FileExistsError):
self.download_minio_file(
source=source.rsplit("/", 1)[0]
+ "/"
+ file_object.object_name.rsplit("/", 1)[1],
destination=file_destination,
exists_ok=False,
)

if file_destination.is_file() and file_destination.suffix == ".zip":
file_destination.unlink()
marker.touch()
113 changes: 113 additions & 0 deletions openml/_api/resources/base/resources.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
from __future__ import annotations

import builtins
from abc import abstractmethod
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal

if TYPE_CHECKING:
import pandas as pd

from openml.datasets.dataset import OpenMLDataFeature, OpenMLDataset
from openml.enums import ResourceType

from .base import ResourceAPI
Expand All @@ -10,6 +19,110 @@ class DatasetAPI(ResourceAPI):

resource_type: ResourceType = ResourceType.DATASET

@abstractmethod
def get( # noqa: PLR0913
self,
dataset_id: int,
download_data: bool = False, # noqa: FBT002
cache_format: Literal["pickle", "feather"] = "pickle",
download_qualities: bool = False, # noqa: FBT002
download_features_meta_data: bool = False, # noqa: FBT002
download_all_files: bool = False, # noqa: FBT002
force_refresh_cache: bool = False, # noqa: FBT002
) -> OpenMLDataset: ...

@abstractmethod
def list(
self,
limit: int,
offset: int,
*,
data_id: list[int] | None = None, # type: ignore
**kwargs: Any,
) -> pd.DataFrame: ...
Comment on lines +35 to +42
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we not have same signature for all 3 methods: DatasetsAPI.list, DatasetsV1.list, DatasetsV2.list? does it raise pre-commit failures since a few might not be used?

Copy link
Contributor Author

@JATAYU000 JATAYU000 Jan 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh that v2 signature was experimental, idk how pre-commits did not catch that, Will make them same

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is mypy supposed to catch that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes unused parameters are caught under #ARG001 as seen in the cache_directory params.


@abstractmethod
def edit( # noqa: PLR0913
self,
dataset_id: int,
description: str | None = None,
creator: str | None = None,
contributor: str | None = None,
collection_date: str | None = None,
language: str | None = None,
default_target_attribute: str | None = None,
ignore_attribute: str | list[str] | None = None, # type: ignore
citation: str | None = None,
row_id_attribute: str | None = None,
original_data_url: str | None = None,
paper_url: str | None = None,
) -> int: ...

@abstractmethod
def fork(self, dataset_id: int) -> int: ...

@abstractmethod
def status_update(self, dataset_id: int, status: Literal["active", "deactivated"]) -> None: ...

@abstractmethod
def list_qualities(self) -> builtins.list[str]: ...

@abstractmethod
def feature_add_ontology(self, dataset_id: int, index: int, ontology: str) -> bool: ...

@abstractmethod
def feature_remove_ontology(self, dataset_id: int, index: int, ontology: str) -> bool: ...

@abstractmethod
def get_features(self, dataset_id: int) -> dict[int, OpenMLDataFeature]: ...

@abstractmethod
def get_qualities(self, dataset_id: int) -> dict[str, float] | None: ...

@abstractmethod
def parse_features_file(
self, features_file: Path, features_pickle_file: Path
) -> dict[int, OpenMLDataFeature]: ...

@abstractmethod
def parse_qualities_file(
self, qualities_file: Path, qualities_pickle_file: Path
) -> dict[str, float]: ...

@abstractmethod
def _download_file(self, url_ext: str, file_path: str, encoding: str = "utf-8") -> Path: ...

@abstractmethod
def download_features_file(self, dataset_id: int) -> Path: ...

@abstractmethod
def download_qualities_file(self, dataset_id: int) -> Path: ...

@abstractmethod
def download_dataset_parquet(
self,
description: dict | OpenMLDataset,
download_all_files: bool = False, # noqa: FBT002
) -> Path | None: ...

@abstractmethod
def download_dataset_arff(
self,
description: dict | OpenMLDataset,
) -> Path: ...

@abstractmethod
def add_topic(self, dataset_id: int, topic: str) -> int: ...

@abstractmethod
def delete_topic(self, dataset_id: int, topic: str) -> int: ...

@abstractmethod
def get_online_dataset_format(self, dataset_id: int) -> str: ...

@abstractmethod
def get_online_dataset_arff(self, dataset_id: int) -> str | None: ...


class TaskAPI(ResourceAPI):
"""Abstract API interface for task resources."""
Expand Down
Loading
Loading