@@ -527,27 +527,20 @@ def test_deletion_of_cache_dir(self):
527527 def test_deletion_of_cache_dir_faulty_download (self , patch ):
528528 patch .side_effect = Exception ("Boom!" )
529529 self .assertRaisesRegex (Exception , "Boom!" , openml .datasets .get_dataset , dataset_id = 1 )
530- datasets_cache_dir = os .path .join (self . workdir , "org" , " openml" , "test" , "datasets" )
530+ datasets_cache_dir = os .path .join (openml . config . get_cache_directory () , "datasets" )
531531 assert len (os .listdir (datasets_cache_dir )) == 0
532532
533533 @pytest .mark .test_server ()
534534 def test_publish_dataset (self ):
535- # lazy loading not possible as we need the arff-file.
536- openml .datasets .get_dataset (3 , download_data = True )
537- file_path = os .path .join (
538- openml .config .get_cache_directory (),
539- "datasets" ,
540- "3" ,
541- "dataset.arff" ,
542- )
535+ arff_file_path = self .static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
543536 dataset = OpenMLDataset (
544537 "anneal" ,
545538 "test" ,
546539 data_format = "arff" ,
547540 version = 1 ,
548541 licence = "public" ,
549542 default_target_attribute = "class" ,
550- data_file = file_path ,
543+ data_file = arff_file_path ,
551544 )
552545 dataset .publish ()
553546 TestBase ._mark_entity_for_removal ("data" , dataset .dataset_id )
@@ -890,7 +883,7 @@ def test_create_invalid_dataset(self):
890883
891884 @pytest .mark .test_server ()
892885 def test_get_online_dataset_arff (self ):
893- dataset_id = 100 # Australian
886+ dataset_id = 128 # iris -- one of the few datasets without parquet file
894887 # lazy loading not used as arff file is checked.
895888 dataset = openml .datasets .get_dataset (dataset_id , download_data = True )
896889 decoder = arff .ArffDecoder ()
@@ -1468,8 +1461,9 @@ def test_data_edit_critical_field(self):
14681461 raise e
14691462 time .sleep (10 )
14701463 # Delete the cache dir to get the newer version of the dataset
1464+
14711465 shutil .rmtree (
1472- os .path .join (self . workdir , "org" , " openml" , "test" , "datasets" , str (did )),
1466+ os .path .join (openml . config . get_cache_directory () , "datasets" , str (did )),
14731467 )
14741468
14751469 @pytest .mark .test_server ()
@@ -1734,7 +1728,6 @@ def test_delete_dataset(self):
17341728
17351729@mock .patch .object (requests .Session , "delete" )
17361730def test_delete_dataset_not_owned (mock_delete , test_files_directory , test_api_key ):
1737- openml .config .start_using_configuration_for_example ()
17381731 content_file = (
17391732 test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
17401733 )
@@ -1749,14 +1742,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
17491742 ):
17501743 openml .datasets .delete_dataset (40_000 )
17511744
1752- dataset_url = "https://test. openml.org /api/v1/xml/data/40000"
1745+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/40000"
17531746 assert dataset_url == mock_delete .call_args .args [0 ]
17541747 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
17551748
17561749
17571750@mock .patch .object (requests .Session , "delete" )
17581751def test_delete_dataset_with_run (mock_delete , test_files_directory , test_api_key ):
1759- openml .config .start_using_configuration_for_example ()
17601752 content_file = (
17611753 test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
17621754 )
@@ -1771,14 +1763,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
17711763 ):
17721764 openml .datasets .delete_dataset (40_000 )
17731765
1774- dataset_url = "https://test. openml.org /api/v1/xml/data/40000"
1766+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/40000"
17751767 assert dataset_url == mock_delete .call_args .args [0 ]
17761768 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
17771769
17781770
17791771@mock .patch .object (requests .Session , "delete" )
17801772def test_delete_dataset_success (mock_delete , test_files_directory , test_api_key ):
1781- openml .config .start_using_configuration_for_example ()
17821773 content_file = (
17831774 test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
17841775 )
@@ -1790,14 +1781,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
17901781 success = openml .datasets .delete_dataset (40000 )
17911782 assert success
17921783
1793- dataset_url = "https://test. openml.org /api/v1/xml/data/40000"
1784+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/40000"
17941785 assert dataset_url == mock_delete .call_args .args [0 ]
17951786 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
17961787
17971788
17981789@mock .patch .object (requests .Session , "delete" )
17991790def test_delete_unknown_dataset (mock_delete , test_files_directory , test_api_key ):
1800- openml .config .start_using_configuration_for_example ()
18011791 content_file = (
18021792 test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
18031793 )
@@ -1812,7 +1802,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
18121802 ):
18131803 openml .datasets .delete_dataset (9_999_999 )
18141804
1815- dataset_url = "https://test. openml.org /api/v1/xml/data/9999999"
1805+ dataset_url = f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/9999999"
18161806 assert dataset_url == mock_delete .call_args .args [0 ]
18171807 assert test_api_key == mock_delete .call_args .kwargs .get ("params" , {}).get ("api_key" )
18181808
@@ -1907,9 +1897,8 @@ def _dataset_features_is_downloaded(did: int):
19071897
19081898
19091899def _dataset_data_file_is_downloaded (did : int ):
1910- parquet_present = _dataset_file_is_downloaded (did , "dataset.pq" )
1911- arff_present = _dataset_file_is_downloaded (did , "dataset.arff" )
1912- return parquet_present or arff_present
1900+ cache_directory = Path (openml .config .get_cache_directory ()) / "datasets" / str (did )
1901+ return any (f .suffix in (".pq" , ".arff" ) for f in cache_directory .iterdir ())
19131902
19141903
19151904def _assert_datasets_retrieved_successfully (
@@ -2014,7 +2003,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
20142003 test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
20152004 )
20162005 # While the mocked example is from production, unit tests by default connect to the test server.
2017- requests_mock .get ("https://test. openml.org /api/v1/xml/data/61" , text = content_file .read_text ())
2006+ requests_mock .get (f" { openml .config . TEST_SERVER_URL } /api/v1/xml/data/61" , text = content_file .read_text ())
20182007 dataset = openml .datasets .get_dataset (61 , download_data = True )
20192008 assert dataset ._parquet_url is not None
20202009 assert dataset .parquet_file is not None
0 commit comments