Skip to content

Commit ede1497

Browse files
PGijsbersgeetu040
andauthored
[ENH] Allow using a local test server (#1630)
Update the tests to allow connecting to a local test server instead of a remote one (requires openml/services#13). Running the tests locally: - Locally start the services (as defined in openml/services#13) using `docker compose --profile "rest-api" --profile "evaluation-engine" up -d`. Startup can take a few minutes, as currently the PHP container still builds the ES indices from scratch. I noticed that the `start_period` for some services isn't sufficient on my M1 Mac, possibly due to some containers requiring Rosetta to run, slowing things down. You can recognize this by the services reporting "Error" while the container remains running. To avoid this, you can either increase the `start_period` of the services (mostly elastic search and php api), or you can simply run the command again (the services are then already in healthy state and the services that depended on it can start successfully). The following containers should run: openml-test-database, openml-php-rest-api, openml-nginx, openml-evaluation-engine, openml-elasticsearch, openml-minio - Update the `openml/config.py`'s `TEST_SERVER_URL` variable to `"http://localhost:8000"`. - Run the tests (`python -m pytest -m "not production" tests`). This PR builds off unmerged PR #1620. --------- Co-authored-by: Armaghan Shakir <raoarmaghanshakir040@gmail.com>
1 parent 099a1dc commit ede1497

File tree

11 files changed

+56
-68
lines changed

11 files changed

+56
-68
lines changed

openml/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def check_server(server: str) -> str:
109109

110110
def replace_shorthand(server: str) -> str:
111111
if server == "test":
112-
return "https://test.openml.org/api/v1/xml"
112+
return f"{config.TEST_SERVER_URL}/api/v1/xml"
113113
if server == "production_server":
114114
return "https://www.openml.org/api/v1/xml"
115115
return server

openml/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY"
2929
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"
3030

31+
TEST_SERVER_URL = "https://test.openml.org"
32+
3133

3234
class _Config(TypedDict):
3335
apikey: str
@@ -214,7 +216,7 @@ class ConfigurationForExamples:
214216
_last_used_server = None
215217
_last_used_key = None
216218
_start_last_called = False
217-
_test_server = "https://test.openml.org/api/v1/xml"
219+
_test_server = f"{TEST_SERVER_URL}/api/v1/xml"
218220
_test_apikey = _TEST_SERVER_NORMAL_USER_KEY
219221

220222
@classmethod
@@ -470,7 +472,8 @@ def get_cache_directory() -> str:
470472
471473
"""
472474
url_suffix = urlparse(server).netloc
473-
reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118
475+
url_parts = url_suffix.replace(":", "_").split(".")[::-1]
476+
reversed_url_suffix = os.sep.join(url_parts) # noqa: PTH118
474477
return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118
475478

476479

openml/tasks/functions.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,10 @@ def get_task(
415415
if not isinstance(task_id, int):
416416
raise TypeError(f"Task id should be integer, is {type(task_id)}")
417417

418-
cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
419-
tid_cache_dir = cache_key_dir / str(task_id)
420-
tid_cache_dir_existed = tid_cache_dir.exists()
418+
task_cache_directory = openml.utils._create_cache_directory_for_id(
419+
TASKS_CACHE_DIR_NAME, task_id
420+
)
421+
task_cache_directory_existed = task_cache_directory.exists()
421422
try:
422423
task = _get_task_description(task_id)
423424
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
@@ -431,8 +432,8 @@ def get_task(
431432
if download_splits and isinstance(task, OpenMLSupervisedTask):
432433
task.download_split()
433434
except Exception as e:
434-
if not tid_cache_dir_existed:
435-
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
435+
if not task_cache_directory_existed:
436+
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
436437
raise e
437438

438439
return task

openml/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class TestBase(unittest.TestCase):
4747
"user": [],
4848
}
4949
flow_name_tracker: ClassVar[list[str]] = []
50-
test_server = "https://test.openml.org/api/v1/xml"
50+
test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
5151
admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR)
5252
user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY
5353

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def with_server(request):
277277
openml.config.apikey = None
278278
yield
279279
return
280-
openml.config.server = "https://test.openml.org/api/v1/xml"
280+
openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
281281
openml.config.apikey = TestBase.user_key
282282
yield
283283

tests/files/localhost_8000

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
org/openml/test

tests/test_datasets/test_dataset_functions.py

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -527,27 +527,20 @@ def test_deletion_of_cache_dir(self):
527527
def test_deletion_of_cache_dir_faulty_download(self, patch):
528528
patch.side_effect = Exception("Boom!")
529529
self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
530-
datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
530+
datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
531531
assert len(os.listdir(datasets_cache_dir)) == 0
532532

533533
@pytest.mark.test_server()
534534
def test_publish_dataset(self):
535-
# lazy loading not possible as we need the arff-file.
536-
openml.datasets.get_dataset(3, download_data=True)
537-
file_path = os.path.join(
538-
openml.config.get_cache_directory(),
539-
"datasets",
540-
"3",
541-
"dataset.arff",
542-
)
535+
arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
543536
dataset = OpenMLDataset(
544537
"anneal",
545538
"test",
546539
data_format="arff",
547540
version=1,
548541
licence="public",
549542
default_target_attribute="class",
550-
data_file=file_path,
543+
data_file=arff_file_path,
551544
)
552545
dataset.publish()
553546
TestBase._mark_entity_for_removal("data", dataset.dataset_id)
@@ -890,7 +883,7 @@ def test_create_invalid_dataset(self):
890883

891884
@pytest.mark.test_server()
892885
def test_get_online_dataset_arff(self):
893-
dataset_id = 100 # Australian
886+
dataset_id = 128 # iris -- one of the few datasets without parquet file
894887
# lazy loading not used as arff file is checked.
895888
dataset = openml.datasets.get_dataset(dataset_id, download_data=True)
896889
decoder = arff.ArffDecoder()
@@ -1468,8 +1461,9 @@ def test_data_edit_critical_field(self):
14681461
raise e
14691462
time.sleep(10)
14701463
# Delete the cache dir to get the newer version of the dataset
1464+
14711465
shutil.rmtree(
1472-
os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
1466+
os.path.join(openml.config.get_cache_directory(), "datasets", str(did)),
14731467
)
14741468

14751469
@pytest.mark.test_server()
@@ -1734,7 +1728,6 @@ def test_delete_dataset(self):
17341728

17351729
@mock.patch.object(requests.Session, "delete")
17361730
def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
1737-
openml.config.start_using_configuration_for_example()
17381731
content_file = (
17391732
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
17401733
)
@@ -1749,14 +1742,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
17491742
):
17501743
openml.datasets.delete_dataset(40_000)
17511744

1752-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1745+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17531746
assert dataset_url == mock_delete.call_args.args[0]
17541747
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17551748

17561749

17571750
@mock.patch.object(requests.Session, "delete")
17581751
def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
1759-
openml.config.start_using_configuration_for_example()
17601752
content_file = (
17611753
test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
17621754
)
@@ -1771,14 +1763,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
17711763
):
17721764
openml.datasets.delete_dataset(40_000)
17731765

1774-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1766+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17751767
assert dataset_url == mock_delete.call_args.args[0]
17761768
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17771769

17781770

17791771
@mock.patch.object(requests.Session, "delete")
17801772
def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
1781-
openml.config.start_using_configuration_for_example()
17821773
content_file = (
17831774
test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
17841775
)
@@ -1790,14 +1781,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
17901781
success = openml.datasets.delete_dataset(40000)
17911782
assert success
17921783

1793-
dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
1784+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
17941785
assert dataset_url == mock_delete.call_args.args[0]
17951786
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
17961787

17971788

17981789
@mock.patch.object(requests.Session, "delete")
17991790
def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
1800-
openml.config.start_using_configuration_for_example()
18011791
content_file = (
18021792
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
18031793
)
@@ -1812,7 +1802,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
18121802
):
18131803
openml.datasets.delete_dataset(9_999_999)
18141804

1815-
dataset_url = "https://test.openml.org/api/v1/xml/data/9999999"
1805+
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
18161806
assert dataset_url == mock_delete.call_args.args[0]
18171807
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
18181808

@@ -1907,9 +1897,8 @@ def _dataset_features_is_downloaded(did: int):
19071897

19081898

19091899
def _dataset_data_file_is_downloaded(did: int):
1910-
parquet_present = _dataset_file_is_downloaded(did, "dataset.pq")
1911-
arff_present = _dataset_file_is_downloaded(did, "dataset.arff")
1912-
return parquet_present or arff_present
1900+
cache_directory = Path(openml.config.get_cache_directory()) / "datasets" / str(did)
1901+
return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir())
19131902

19141903

19151904
def _assert_datasets_retrieved_successfully(
@@ -2014,7 +2003,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
20142003
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
20152004
)
20162005
# While the mocked example is from production, unit tests by default connect to the test server.
2017-
requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text())
2006+
requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
20182007
dataset = openml.datasets.get_dataset(61, download_data=True)
20192008
assert dataset._parquet_url is not None
20202009
assert dataset.parquet_file is not None

tests/test_flows/test_flow_functions.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,6 @@ def test_delete_flow(self):
453453

454454
@mock.patch.object(requests.Session, "delete")
455455
def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
456-
openml.config.start_using_configuration_for_example()
457456
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
458457
mock_delete.return_value = create_request_response(
459458
status_code=412,
@@ -466,14 +465,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
466465
):
467466
openml.flows.delete_flow(40_000)
468467

469-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
468+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
470469
assert flow_url == mock_delete.call_args.args[0]
471470
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
472471

473472

474473
@mock.patch.object(requests.Session, "delete")
475474
def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
476-
openml.config.start_using_configuration_for_example()
477475
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
478476
mock_delete.return_value = create_request_response(
479477
status_code=412,
@@ -486,14 +484,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
486484
):
487485
openml.flows.delete_flow(40_000)
488486

489-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
487+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
490488
assert flow_url == mock_delete.call_args.args[0]
491489
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
492490

493491

494492
@mock.patch.object(requests.Session, "delete")
495493
def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
496-
openml.config.start_using_configuration_for_example()
497494
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
498495
mock_delete.return_value = create_request_response(
499496
status_code=412,
@@ -506,14 +503,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
506503
):
507504
openml.flows.delete_flow(40_000)
508505

509-
flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
506+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
510507
assert flow_url == mock_delete.call_args.args[0]
511508
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
512509

513510

514511
@mock.patch.object(requests.Session, "delete")
515512
def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
516-
openml.config.start_using_configuration_for_example()
517513
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
518514
mock_delete.return_value = create_request_response(
519515
status_code=200,
@@ -523,15 +519,14 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
523519
success = openml.flows.delete_flow(33364)
524520
assert success
525521

526-
flow_url = "https://test.openml.org/api/v1/xml/flow/33364"
522+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
527523
assert flow_url == mock_delete.call_args.args[0]
528524
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
529525

530526

531527
@mock.patch.object(requests.Session, "delete")
532528
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
533529
def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
534-
openml.config.start_using_configuration_for_example()
535530
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
536531
mock_delete.return_value = create_request_response(
537532
status_code=412,
@@ -544,6 +539,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
544539
):
545540
openml.flows.delete_flow(9_999_999)
546541

547-
flow_url = "https://test.openml.org/api/v1/xml/flow/9999999"
542+
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
548543
assert flow_url == mock_delete.call_args.args[0]
549544
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")

tests/test_openml/test_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def test_get_config_as_dict(self):
7878
config = openml.config.get_config_as_dict()
7979
_config = {}
8080
_config["apikey"] = TestBase.user_key
81-
_config["server"] = "https://test.openml.org/api/v1/xml"
81+
_config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
8282
_config["cachedir"] = self.workdir
8383
_config["avoid_duplicate_runs"] = False
8484
_config["connection_n_retries"] = 20

tests/test_runs/test_run_functions.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,7 +1813,6 @@ def test_initialize_model_from_run_nonstrict(self):
18131813

18141814
@mock.patch.object(requests.Session, "delete")
18151815
def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
1816-
openml.config.start_using_configuration_for_example()
18171816
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
18181817
mock_delete.return_value = create_request_response(
18191818
status_code=412,
@@ -1826,14 +1825,13 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
18261825
):
18271826
openml.runs.delete_run(40_000)
18281827

1829-
run_url = "https://test.openml.org/api/v1/xml/run/40000"
1828+
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/40000"
18301829
assert run_url == mock_delete.call_args.args[0]
18311830
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
18321831

18331832

18341833
@mock.patch.object(requests.Session, "delete")
18351834
def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
1836-
openml.config.start_using_configuration_for_example()
18371835
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
18381836
mock_delete.return_value = create_request_response(
18391837
status_code=200,
@@ -1843,14 +1841,13 @@ def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
18431841
success = openml.runs.delete_run(10591880)
18441842
assert success
18451843

1846-
run_url = "https://test.openml.org/api/v1/xml/run/10591880"
1844+
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/10591880"
18471845
assert run_url == mock_delete.call_args.args[0]
18481846
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
18491847

18501848

18511849
@mock.patch.object(requests.Session, "delete")
18521850
def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
1853-
openml.config.start_using_configuration_for_example()
18541851
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
18551852
mock_delete.return_value = create_request_response(
18561853
status_code=412,
@@ -1863,7 +1860,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
18631860
):
18641861
openml.runs.delete_run(9_999_999)
18651862

1866-
run_url = "https://test.openml.org/api/v1/xml/run/9999999"
1863+
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/9999999"
18671864
assert run_url == mock_delete.call_args.args[0]
18681865
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
18691866

@@ -1873,6 +1870,10 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
18731870
Version(sklearn.__version__) < Version("0.21"),
18741871
reason="couldn't perform local tests successfully w/o bloating RAM",
18751872
)
1873+
@unittest.skipIf(
1874+
Version(sklearn.__version__) >= Version("1.8"),
1875+
reason="predictions differ significantly",
1876+
)
18761877
@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
18771878
@pytest.mark.test_server()
18781879
def test__run_task_get_arffcontent_2(parallel_mock):

0 commit comments

Comments
 (0)