diff --git a/openevolve/config.py b/openevolve/config.py index 0c6219d04..86f65d765 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -340,6 +340,7 @@ class DatabaseConfig: artifact_size_threshold: int = 32 * 1024 # 32KB threshold cleanup_old_artifacts: bool = True artifact_retention_days: int = 30 + max_snapshot_artifacts: Optional[int] = 100 # Max artifacts in worker snapshots (None=unlimited) novelty_llm: Optional["LLMInterface"] = None embedding_model: Optional[str] = None diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py index 0306cfe21..2d65b6ce4 100644 --- a/openevolve/process_parallel.py +++ b/openevolve/process_parallel.py @@ -431,13 +431,18 @@ def _create_database_snapshot(self) -> Dict[str, Any]: } # Include artifacts for programs that might be selected - # IMPORTANT: This limits artifacts (execution outputs/errors) to first 100 programs only. + # This limits artifacts (execution outputs/errors) to avoid large snapshot sizes. # This does NOT affect program code - all programs are fully serialized above. # With max_artifact_bytes=20KB and population_size=1000, artifacts could be 20MB total, - # which would significantly slow worker process initialization. The limit of 100 keeps - # artifact data under 2MB while still providing execution context for recent programs. + # which would significantly slow worker process initialization. The default limit of 100 + # keeps artifact data under 2MB while still providing execution context for recent programs. # Workers can still evolve properly as they have access to ALL program code. - for pid in list(self.database.programs.keys())[:100]: + # Configure via database.max_snapshot_artifacts (None for unlimited). + max_artifacts = self.database.config.max_snapshot_artifacts + program_ids = list(self.database.programs.keys()) + if max_artifacts is not None: + program_ids = program_ids[:max_artifacts] + for pid in program_ids: artifacts = self.database.get_artifacts(pid) if artifacts: snapshot["artifacts"][pid] = artifacts diff --git a/tests/integration/test_examples_validation.py b/tests/integration/test_examples_validation.py new file mode 100644 index 000000000..56002bada --- /dev/null +++ b/tests/integration/test_examples_validation.py @@ -0,0 +1,375 @@ +""" +Integration tests that validate existing examples work correctly. +These tests verify that evaluators, configs, and initial programs are properly set up. +""" + +import importlib.util +import os +import sys +import tempfile +import shutil +import unittest +from pathlib import Path +from unittest.mock import patch + +# Add project root to path +PROJECT_ROOT = Path(__file__).parent.parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +from openevolve.config import Config, load_config +from openevolve.evaluator import Evaluator + + +class TestFunctionMinimizationExample(unittest.TestCase): + """Integration tests for the function_minimization example""" + + EXAMPLE_DIR = PROJECT_ROOT / "examples" / "function_minimization" + + def test_config_loads(self): + """Test that the config file loads without errors""" + config_path = self.EXAMPLE_DIR / "config.yaml" + if not config_path.exists(): + self.skipTest("function_minimization config not found") + + config = load_config(str(config_path)) + self.assertIsInstance(config, Config) + self.assertGreater(config.max_iterations, 0) + + def test_initial_program_exists(self): + """Test that the initial program file exists""" + program_path = self.EXAMPLE_DIR / "initial_program.py" + self.assertTrue(program_path.exists(), "initial_program.py should exist") + + def test_initial_program_has_evolve_block(self): + """Test that the initial program has EVOLVE-BLOCK markers""" + program_path = self.EXAMPLE_DIR / "initial_program.py" + if not program_path.exists(): + self.skipTest("initial_program.py not found") + + content = program_path.read_text() + self.assertIn("EVOLVE-BLOCK-START", content) + self.assertIn("EVOLVE-BLOCK-END", content) + + def test_evaluator_exists(self): + """Test that the evaluator file exists""" + evaluator_path = self.EXAMPLE_DIR / "evaluator.py" + self.assertTrue(evaluator_path.exists(), "evaluator.py should exist") + + def test_evaluator_has_evaluate_function(self): + """Test that the evaluator has an evaluate function""" + evaluator_path = self.EXAMPLE_DIR / "evaluator.py" + if not evaluator_path.exists(): + self.skipTest("evaluator.py not found") + + spec = importlib.util.spec_from_file_location("evaluator", evaluator_path) + evaluator_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(evaluator_module) + + self.assertTrue(hasattr(evaluator_module, "evaluate")) + self.assertTrue(callable(evaluator_module.evaluate)) + + def test_evaluator_runs_on_initial_program(self): + """Test that the evaluator can evaluate the initial program""" + evaluator_path = self.EXAMPLE_DIR / "evaluator.py" + program_path = self.EXAMPLE_DIR / "initial_program.py" + + if not evaluator_path.exists() or not program_path.exists(): + self.skipTest("Example files not found") + + # Load evaluator + spec = importlib.util.spec_from_file_location("evaluator", evaluator_path) + evaluator_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(evaluator_module) + + # Run evaluation + result = evaluator_module.evaluate(str(program_path)) + + # Check result structure + if hasattr(result, 'metrics'): + # EvaluationResult object + metrics = result.metrics + else: + # Dictionary + metrics = result + + self.assertIn("combined_score", metrics) + self.assertIsInstance(metrics["combined_score"], (int, float)) + + +class TestCirclePackingExample(unittest.TestCase): + """Integration tests for the circle_packing example""" + + EXAMPLE_DIR = PROJECT_ROOT / "examples" / "circle_packing" + + def test_config_loads(self): + """Test that config files load without errors""" + for config_name in ["config_phase_1.yaml", "config_phase_2.yaml"]: + config_path = self.EXAMPLE_DIR / config_name + if config_path.exists(): + config = load_config(str(config_path)) + self.assertIsInstance(config, Config) + + def test_evaluator_exists(self): + """Test that evaluator exists""" + evaluator_path = self.EXAMPLE_DIR / "evaluator.py" + self.assertTrue(evaluator_path.exists(), "evaluator.py should exist") + + def test_evaluator_has_evaluate_function(self): + """Test that the evaluator has required functions""" + evaluator_path = self.EXAMPLE_DIR / "evaluator.py" + if not evaluator_path.exists(): + self.skipTest("evaluator.py not found") + + spec = importlib.util.spec_from_file_location("evaluator", evaluator_path) + evaluator_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(evaluator_module) + + self.assertTrue(hasattr(evaluator_module, "evaluate")) + + +class TestSignalProcessingExample(unittest.TestCase): + """Integration tests for the signal_processing example""" + + EXAMPLE_DIR = PROJECT_ROOT / "examples" / "signal_processing" + + def test_config_loads(self): + """Test that the config file loads""" + config_path = self.EXAMPLE_DIR / "config.yaml" + if not config_path.exists(): + self.skipTest("signal_processing config not found") + + config = load_config(str(config_path)) + self.assertIsInstance(config, Config) + + def test_evaluator_exists(self): + """Test that evaluator exists""" + evaluator_path = self.EXAMPLE_DIR / "evaluator.py" + if not evaluator_path.exists(): + self.skipTest("evaluator.py not found") + self.assertTrue(evaluator_path.exists()) + + +class TestEvaluatorIntegration(unittest.TestCase): + """Integration tests for the Evaluator class with real examples""" + + def test_evaluator_loads_function_minimization(self): + """Test that Evaluator can load the function_minimization evaluator""" + evaluator_path = PROJECT_ROOT / "examples" / "function_minimization" / "evaluator.py" + if not evaluator_path.exists(): + self.skipTest("function_minimization evaluator not found") + + from openevolve.config import EvaluatorConfig + config = EvaluatorConfig(timeout=30, cascade_evaluation=True) + + evaluator = Evaluator(config, str(evaluator_path)) + self.assertIsNotNone(evaluator.evaluate_function) + self.assertTrue(callable(evaluator.evaluate_function)) + + def test_evaluator_module_has_cascade_functions(self): + """Test that function_minimization evaluator has cascade functions""" + evaluator_path = PROJECT_ROOT / "examples" / "function_minimization" / "evaluator.py" + if not evaluator_path.exists(): + self.skipTest("function_minimization evaluator not found") + + # Load the module directly to check for cascade functions + spec = importlib.util.spec_from_file_location("evaluator", evaluator_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + # function_minimization has evaluate_stage1 and evaluate_stage2 + self.assertTrue(hasattr(module, "evaluate_stage1")) + self.assertTrue(hasattr(module, "evaluate_stage2")) + self.assertTrue(callable(module.evaluate_stage1)) + self.assertTrue(callable(module.evaluate_stage2)) + + +class TestConfigIntegration(unittest.TestCase): + """Integration tests for config loading across examples""" + + @patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}) + def test_all_example_configs_load(self): + """Test that all example config files can be loaded""" + examples_dir = PROJECT_ROOT / "examples" + failed_configs = [] + + for config_path in examples_dir.rglob("*config*.yaml"): + try: + config = load_config(str(config_path)) + self.assertIsInstance(config, Config) + except Exception as e: + failed_configs.append((str(config_path), str(e))) + + if failed_configs: + failure_msg = "\n".join([f"{path}: {error}" for path, error in failed_configs]) + self.fail(f"Failed to load configs:\n{failure_msg}") + + def test_config_has_required_sections(self): + """Test that loaded configs have required sections""" + config_path = PROJECT_ROOT / "examples" / "function_minimization" / "config.yaml" + if not config_path.exists(): + self.skipTest("function_minimization config not found") + + config = load_config(str(config_path)) + + # Check required sections + self.assertIsNotNone(config.llm) + self.assertIsNotNone(config.database) + self.assertIsNotNone(config.evaluator) + self.assertIsNotNone(config.prompt) + + +class TestEndToEndWithMockedLLM(unittest.TestCase): + """End-to-end tests with mocked LLM responses""" + + def setUp(self): + """Set up test fixtures""" + self.temp_dir = tempfile.mkdtemp() + self.example_dir = PROJECT_ROOT / "examples" / "function_minimization" + + def tearDown(self): + """Clean up""" + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_database_stores_and_retrieves_programs(self): + """Test that the database can store and retrieve programs""" + from openevolve.database import ProgramDatabase, Program, DatabaseConfig + + config = DatabaseConfig(population_size=100) + db = ProgramDatabase(config) + + # Add a program + program = Program( + id="test_prog_1", + code="def test(): return 42", + generation=0, + metrics={"combined_score": 0.5}, + ) + db.add(program) + + # Retrieve it + retrieved = db.programs.get("test_prog_1") + self.assertIsNotNone(retrieved) + self.assertEqual(retrieved.code, "def test(): return 42") + + def test_program_evolution_tracking(self): + """Test that program generations are tracked correctly""" + from openevolve.database import ProgramDatabase, Program, DatabaseConfig + + config = DatabaseConfig(population_size=100) + db = ProgramDatabase(config) + + # Add parent program + parent = Program( + id="parent_1", + code="def test(): return 1", + generation=0, + metrics={"combined_score": 0.3}, + ) + db.add(parent) + + # Add child program + child = Program( + id="child_1", + code="def test(): return 2", + generation=1, + parent_id="parent_1", + metrics={"combined_score": 0.5}, + ) + db.add(child) + + # Verify relationships + self.assertEqual(db.programs["child_1"].parent_id, "parent_1") + self.assertEqual(db.programs["child_1"].generation, 1) + + def test_evaluator_returns_evaluation_result(self): + """Test that evaluators return proper EvaluationResult objects""" + from openevolve.evaluation_result import EvaluationResult + + evaluator_path = self.example_dir / "evaluator.py" + program_path = self.example_dir / "initial_program.py" + + if not evaluator_path.exists() or not program_path.exists(): + self.skipTest("Example files not found") + + spec = importlib.util.spec_from_file_location("evaluator", evaluator_path) + evaluator_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(evaluator_module) + + result = evaluator_module.evaluate(str(program_path)) + + # Should be an EvaluationResult or dict with metrics + if isinstance(result, EvaluationResult): + self.assertIn("combined_score", result.metrics) + else: + self.assertIn("combined_score", result) + + +class TestExampleStructure(unittest.TestCase): + """Tests to verify example directory structure is correct""" + + def test_examples_have_required_files(self): + """Test that examples have the minimum required files""" + examples_dir = PROJECT_ROOT / "examples" + + # These examples should have at least a config and evaluator + required_examples = [ + "function_minimization", + "circle_packing", + ] + + for example_name in required_examples: + example_dir = examples_dir / example_name + if not example_dir.exists(): + continue + + # Check for config + config_files = list(example_dir.glob("*config*.yaml")) + self.assertGreater( + len(config_files), 0, + f"{example_name} should have at least one config file" + ) + + # Check for evaluator + evaluator_path = example_dir / "evaluator.py" + self.assertTrue( + evaluator_path.exists(), + f"{example_name} should have evaluator.py" + ) + + def test_evaluators_are_importable(self): + """Test that all evaluators can be imported without errors""" + examples_dir = PROJECT_ROOT / "examples" + failed_imports = [] + + for evaluator_path in examples_dir.rglob("evaluator.py"): + try: + spec = importlib.util.spec_from_file_location( + f"evaluator_{evaluator_path.parent.name}", + evaluator_path + ) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + # Verify evaluate function exists + if not hasattr(module, "evaluate"): + failed_imports.append( + (str(evaluator_path), "Missing evaluate function") + ) + except Exception as e: + failed_imports.append((str(evaluator_path), str(e))) + + if failed_imports: + # Only fail if critical examples fail + critical_failures = [ + f for f in failed_imports + if "function_minimization" in f[0] or "circle_packing" in f[0] + ] + if critical_failures: + failure_msg = "\n".join( + [f"{path}: {error}" for path, error in critical_failures] + ) + self.fail(f"Critical evaluators failed to import:\n{failure_msg}") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_changes_description.py b/tests/test_changes_description.py new file mode 100644 index 000000000..b9e0a7b46 --- /dev/null +++ b/tests/test_changes_description.py @@ -0,0 +1,153 @@ +""" +Tests for large codebase support via changes description. +Programs can be represented as compact change descriptions instead of full code. +""" + +import unittest + +from openevolve.config import Config, PromptConfig + + +class TestChangesDescriptionConfigDefaults(unittest.TestCase): + """Tests for changes description configuration defaults""" + + def test_programs_as_changes_description_default_false(self): + """Test that programs_as_changes_description defaults to False""" + config = Config() + self.assertFalse(config.prompt.programs_as_changes_description) + + def test_system_message_changes_description_default_none(self): + """Test that system_message_changes_description defaults to None""" + config = Config() + self.assertIsNone(config.prompt.system_message_changes_description) + + def test_initial_changes_description_default_empty(self): + """Test that initial_changes_description defaults to empty string""" + config = Config() + self.assertEqual(config.prompt.initial_changes_description, "") + + +class TestChangesDescriptionValidation(unittest.TestCase): + """Tests for changes description validation rules""" + + def test_requires_diff_based_evolution(self): + """Test that programs_as_changes_description requires diff_based_evolution""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "diff_based_evolution": False, + "prompt": { + "programs_as_changes_description": True, + } + } + with self.assertRaises(ValueError) as context: + Config.from_dict(config_dict) + self.assertIn("diff_based_evolution", str(context.exception)) + + def test_works_with_diff_based_evolution_enabled(self): + """Test that changes description works when diff_based_evolution=True""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "diff_based_evolution": True, + "prompt": { + "programs_as_changes_description": True, + } + } + config = Config.from_dict(config_dict) + self.assertTrue(config.prompt.programs_as_changes_description) + self.assertTrue(config.diff_based_evolution) + + def test_disabled_without_diff_based_evolution_is_ok(self): + """Test that disabled changes description works without diff_based_evolution""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "diff_based_evolution": False, + "prompt": { + "programs_as_changes_description": False, + } + } + config = Config.from_dict(config_dict) + self.assertFalse(config.prompt.programs_as_changes_description) + + +class TestChangesDescriptionFromDict(unittest.TestCase): + """Tests for loading changes description config from dict""" + + def test_custom_system_message(self): + """Test setting custom system_message_changes_description""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "diff_based_evolution": True, + "prompt": { + "programs_as_changes_description": True, + "system_message_changes_description": "You are optimizing a large codebase.", + } + } + config = Config.from_dict(config_dict) + self.assertEqual( + config.prompt.system_message_changes_description, + "You are optimizing a large codebase." + ) + + def test_custom_initial_description(self): + """Test setting custom initial_changes_description""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "diff_based_evolution": True, + "prompt": { + "programs_as_changes_description": True, + "initial_changes_description": "Initial implementation with basic algorithm.", + } + } + config = Config.from_dict(config_dict) + self.assertEqual( + config.prompt.initial_changes_description, + "Initial implementation with basic algorithm." + ) + + def test_all_changes_description_options(self): + """Test setting all changes description options together""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "diff_based_evolution": True, + "prompt": { + "programs_as_changes_description": True, + "system_message_changes_description": "Custom system message", + "initial_changes_description": "Initial state description", + } + } + config = Config.from_dict(config_dict) + self.assertTrue(config.prompt.programs_as_changes_description) + self.assertEqual( + config.prompt.system_message_changes_description, + "Custom system message" + ) + self.assertEqual( + config.prompt.initial_changes_description, + "Initial state description" + ) + + +class TestPromptConfigChangesDescription(unittest.TestCase): + """Tests for PromptConfig changes description fields""" + + def test_prompt_config_defaults(self): + """Test PromptConfig defaults for changes description""" + prompt_config = PromptConfig() + self.assertFalse(prompt_config.programs_as_changes_description) + self.assertIsNone(prompt_config.system_message_changes_description) + self.assertEqual(prompt_config.initial_changes_description, "") + + def test_prompt_config_custom_values(self): + """Test PromptConfig with custom changes description values""" + prompt_config = PromptConfig( + programs_as_changes_description=True, + system_message_changes_description="Custom message", + initial_changes_description="Initial state", + ) + self.assertTrue(prompt_config.programs_as_changes_description) + self.assertEqual(prompt_config.system_message_changes_description, "Custom message") + self.assertEqual(prompt_config.initial_changes_description, "Initial state") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_early_stopping_config.py b/tests/test_early_stopping_config.py new file mode 100644 index 000000000..4195c42bf --- /dev/null +++ b/tests/test_early_stopping_config.py @@ -0,0 +1,106 @@ +""" +Tests for early stopping configuration and behavior. +""" + +import unittest + +from openevolve.config import Config + + +class TestEarlyStoppingConfigDefaults(unittest.TestCase): + """Tests for early stopping configuration defaults""" + + def test_patience_default_is_none(self): + """Test that early_stopping_patience defaults to None (disabled)""" + config = Config() + self.assertIsNone(config.early_stopping_patience) + + def test_convergence_threshold_default(self): + """Test that convergence_threshold defaults to 0.001""" + config = Config() + self.assertEqual(config.convergence_threshold, 0.001) + + def test_metric_default(self): + """Test that early_stopping_metric defaults to combined_score""" + config = Config() + self.assertEqual(config.early_stopping_metric, "combined_score") + + +class TestEarlyStoppingConfigFromDict(unittest.TestCase): + """Tests for loading early stopping config from dict""" + + def test_custom_patience(self): + """Test setting custom early_stopping_patience""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "early_stopping_patience": 50, + } + config = Config.from_dict(config_dict) + self.assertEqual(config.early_stopping_patience, 50) + + def test_custom_convergence_threshold(self): + """Test setting custom convergence_threshold""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "convergence_threshold": 0.01, + } + config = Config.from_dict(config_dict) + self.assertEqual(config.convergence_threshold, 0.01) + + def test_custom_metric(self): + """Test setting custom early_stopping_metric""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "early_stopping_metric": "score", + } + config = Config.from_dict(config_dict) + self.assertEqual(config.early_stopping_metric, "score") + + def test_all_early_stopping_options(self): + """Test setting all early stopping options together""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "early_stopping_patience": 100, + "convergence_threshold": 0.005, + "early_stopping_metric": "validity", + } + config = Config.from_dict(config_dict) + self.assertEqual(config.early_stopping_patience, 100) + self.assertEqual(config.convergence_threshold, 0.005) + self.assertEqual(config.early_stopping_metric, "validity") + + def test_zero_patience_disables_early_stopping(self): + """Test that patience=0 effectively disables early stopping""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "early_stopping_patience": 0, + } + config = Config.from_dict(config_dict) + self.assertEqual(config.early_stopping_patience, 0) + + def test_negative_patience_allowed(self): + """Test that negative patience is allowed (but probably shouldn't be used)""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "early_stopping_patience": -1, + } + # Should not raise an error during loading + config = Config.from_dict(config_dict) + self.assertEqual(config.early_stopping_patience, -1) + + +class TestEarlyStoppingWithYaml(unittest.TestCase): + """Tests for early stopping config from YAML""" + + def test_config_to_dict_includes_early_stopping(self): + """Test that to_dict includes early stopping settings""" + config = Config() + config_dict = config.to_dict() + + self.assertIn("early_stopping_patience", config_dict) + self.assertIn("convergence_threshold", config_dict) + self.assertIn("early_stopping_metric", config_dict) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_llm_config_optional_params.py b/tests/test_llm_config_optional_params.py new file mode 100644 index 000000000..1011774ce --- /dev/null +++ b/tests/test_llm_config_optional_params.py @@ -0,0 +1,127 @@ +""" +Tests for optional LLM parameters (temperature, top_p). +Ensures Anthropic model compatibility where both params cannot be specified together. +""" + +import unittest + +from openevolve.config import Config, LLMConfig, LLMModelConfig + + +class TestOptionalTemperatureTopP(unittest.TestCase): + """Tests for optional temperature and top_p parameters""" + + def test_llm_config_temperature_default(self): + """Test that temperature defaults to 0.7 in LLMConfig""" + config = LLMConfig() + self.assertEqual(config.temperature, 0.7) + + def test_llm_config_top_p_default_is_none(self): + """Test that top_p defaults to None in LLMConfig (for Anthropic compatibility)""" + config = LLMConfig() + self.assertIsNone(config.top_p) + + def test_model_config_temperature_none_by_default(self): + """Test that LLMModelConfig temperature is None by default""" + config = LLMModelConfig() + self.assertIsNone(config.temperature) + + def test_model_config_top_p_none_by_default(self): + """Test that LLMModelConfig top_p is None by default""" + config = LLMModelConfig() + self.assertIsNone(config.top_p) + + def test_type_annotation_allows_none(self): + """Test that temperature and top_p can be set to None""" + config = LLMModelConfig(temperature=None, top_p=None) + self.assertIsNone(config.temperature) + self.assertIsNone(config.top_p) + + def test_type_annotation_allows_float(self): + """Test that temperature and top_p can be set to float values""" + config = LLMModelConfig(temperature=0.5, top_p=0.9) + self.assertEqual(config.temperature, 0.5) + self.assertEqual(config.top_p, 0.9) + + +class TestConfigFromDictWithOptionalParams(unittest.TestCase): + """Tests for loading config with optional temperature/top_p from dict""" + + def test_config_with_null_temperature_uses_default(self): + """Test loading config with null temperature uses default""" + config_dict = { + "llm": { + "primary_model": "claude-sonnet", + "api_base": "https://api.anthropic.com/v1", + "temperature": None, + } + } + config = Config.from_dict(config_dict) + # None is stripped, so default 0.7 is used + self.assertEqual(config.llm.temperature, 0.7) + + def test_config_with_null_top_p(self): + """Test loading config with null top_p""" + config_dict = { + "llm": { + "primary_model": "gpt-4", + "top_p": None, + } + } + config = Config.from_dict(config_dict) + self.assertIsNone(config.llm.top_p) + + def test_config_with_only_temperature(self): + """Test config with only temperature set (typical for Anthropic)""" + config_dict = { + "llm": { + "primary_model": "claude-sonnet", + "temperature": 0.9, + } + } + config = Config.from_dict(config_dict) + self.assertEqual(config.llm.temperature, 0.9) + self.assertIsNone(config.llm.top_p) + + def test_config_with_only_top_p(self): + """Test config with only top_p set""" + config_dict = { + "llm": { + "primary_model": "gpt-4", + "temperature": None, + "top_p": 0.95, + } + } + config = Config.from_dict(config_dict) + self.assertEqual(config.llm.top_p, 0.95) + + def test_config_with_both_params(self): + """Test config with both temperature and top_p set (OpenAI compatible)""" + config_dict = { + "llm": { + "primary_model": "gpt-4", + "temperature": 0.8, + "top_p": 0.9, + } + } + config = Config.from_dict(config_dict) + self.assertEqual(config.llm.temperature, 0.8) + self.assertEqual(config.llm.top_p, 0.9) + + def test_models_inherit_optional_params(self): + """Test that models inherit temperature/top_p from parent config""" + config_dict = { + "llm": { + "primary_model": "gpt-4", + "temperature": 0.5, + "top_p": None, + } + } + config = Config.from_dict(config_dict) + # Check that models inherited the temperature + for model in config.llm.models: + self.assertEqual(model.temperature, 0.5) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_snapshot_artifacts_limit.py b/tests/test_snapshot_artifacts_limit.py new file mode 100644 index 000000000..9ca57d6d9 --- /dev/null +++ b/tests/test_snapshot_artifacts_limit.py @@ -0,0 +1,141 @@ +""" +Tests for configurable max_snapshot_artifacts limit. +Controls how many artifacts are included in worker process snapshots. +""" + +import unittest + +from openevolve.config import Config, DatabaseConfig +from openevolve.database import ProgramDatabase, Program + + +class TestMaxSnapshotArtifactsConfig(unittest.TestCase): + """Tests for max_snapshot_artifacts configuration""" + + def test_default_value_is_100(self): + """Test that max_snapshot_artifacts defaults to 100""" + config = Config() + self.assertEqual(config.database.max_snapshot_artifacts, 100) + + def test_database_config_default(self): + """Test DatabaseConfig default for max_snapshot_artifacts""" + db_config = DatabaseConfig() + self.assertEqual(db_config.max_snapshot_artifacts, 100) + + def test_custom_value_from_dict(self): + """Test loading custom max_snapshot_artifacts from config dict""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "database": { + "max_snapshot_artifacts": 500, + } + } + config = Config.from_dict(config_dict) + self.assertEqual(config.database.max_snapshot_artifacts, 500) + + def test_unlimited_artifacts_with_none(self): + """Test setting unlimited artifacts with None""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "database": { + "max_snapshot_artifacts": None, + } + } + config = Config.from_dict(config_dict) + self.assertIsNone(config.database.max_snapshot_artifacts) + + def test_zero_artifacts(self): + """Test setting max_snapshot_artifacts to 0""" + config_dict = { + "llm": {"primary_model": "gpt-4"}, + "database": { + "max_snapshot_artifacts": 0, + } + } + config = Config.from_dict(config_dict) + self.assertEqual(config.database.max_snapshot_artifacts, 0) + + +class TestArtifactStorageWithLimit(unittest.TestCase): + """Tests for artifact storage respecting the limit""" + + def test_store_artifacts_within_limit(self): + """Test storing artifacts when within the limit""" + db_config = DatabaseConfig(max_snapshot_artifacts=5) + db = ProgramDatabase(db_config) + + # Add programs with artifacts + for i in range(3): + program = Program( + id=f"prog_{i}", + code=f"def func_{i}(): pass", + generation=0, + metrics={"score": i * 0.1}, + ) + db.add(program) + db.store_artifacts(f"prog_{i}", {"output": f"result_{i}"}) + + # All artifacts should be retrievable + for i in range(3): + artifacts = db.get_artifacts(f"prog_{i}") + self.assertEqual(artifacts.get("output"), f"result_{i}") + + def test_store_many_artifacts(self): + """Test storing more artifacts than the limit""" + db_config = DatabaseConfig(max_snapshot_artifacts=5) + db = ProgramDatabase(db_config) + + # Add 10 programs with artifacts + for i in range(10): + program = Program( + id=f"prog_{i}", + code=f"def func_{i}(): pass", + generation=0, + metrics={"score": i * 0.1}, + ) + db.add(program) + db.store_artifacts(f"prog_{i}", {"output": f"result_{i}"}) + + # All artifacts should still be stored in the database + # (the limit only affects snapshots, not storage) + for i in range(10): + artifacts = db.get_artifacts(f"prog_{i}") + self.assertEqual(artifacts.get("output"), f"result_{i}") + + def test_artifacts_for_nonexistent_program_returns_empty(self): + """Test retrieving artifacts for non-existent program""" + db_config = DatabaseConfig() + db = ProgramDatabase(db_config) + + artifacts = db.get_artifacts("nonexistent_id") + self.assertEqual(artifacts, {}) + + def test_store_artifacts_for_nonexistent_program_logs_warning(self): + """Test that storing artifacts for non-existent program doesn't crash""" + db_config = DatabaseConfig() + db = ProgramDatabase(db_config) + + # Should not raise an error + db.store_artifacts("nonexistent", {"output": "test"}) + + +class TestSnapshotCreation(unittest.TestCase): + """Tests for snapshot creation with artifact limits""" + + def test_config_accessible_from_database(self): + """Test that max_snapshot_artifacts is accessible from database config""" + db_config = DatabaseConfig(max_snapshot_artifacts=50) + db = ProgramDatabase(db_config) + + self.assertEqual(db.config.max_snapshot_artifacts, 50) + + def test_unlimited_config_is_none(self): + """Test that unlimited artifacts config is None""" + db_config = DatabaseConfig(max_snapshot_artifacts=None) + db = ProgramDatabase(db_config) + + self.assertIsNone(db.config.max_snapshot_artifacts) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_visualization_sanitization.py b/tests/test_visualization_sanitization.py new file mode 100644 index 000000000..48a04a053 --- /dev/null +++ b/tests/test_visualization_sanitization.py @@ -0,0 +1,257 @@ +""" +Tests for visualization data sanitization. +Ensures -inf, +inf, and NaN values are properly sanitized for JSON serialization. +""" + +import json +import math +import os +import sys +import unittest + + +class TestCheckJsonFloat(unittest.TestCase): + """Tests for the check_json_float helper function""" + + @classmethod + def setUpClass(cls): + """Add scripts directory to path for importing visualizer""" + cls.scripts_path = os.path.join( + os.path.dirname(__file__), "..", "scripts" + ) + sys.path.insert(0, cls.scripts_path) + + @classmethod + def tearDownClass(cls): + """Remove scripts directory from path""" + if cls.scripts_path in sys.path: + sys.path.remove(cls.scripts_path) + + def test_valid_positive_float(self): + """Test that positive floats are valid""" + try: + from visualizer import check_json_float + self.assertTrue(check_json_float(1.5)) + self.assertTrue(check_json_float(100.0)) + self.assertTrue(check_json_float(0.001)) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_valid_negative_float(self): + """Test that negative floats are valid""" + try: + from visualizer import check_json_float + self.assertTrue(check_json_float(-1.5)) + self.assertTrue(check_json_float(-100.0)) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_valid_zero(self): + """Test that zero is valid""" + try: + from visualizer import check_json_float + self.assertTrue(check_json_float(0)) + self.assertTrue(check_json_float(0.0)) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_valid_integer(self): + """Test that integers are valid""" + try: + from visualizer import check_json_float + self.assertTrue(check_json_float(1)) + self.assertTrue(check_json_float(-5)) + self.assertTrue(check_json_float(1000)) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_invalid_positive_infinity(self): + """Test that positive infinity is invalid""" + try: + from visualizer import check_json_float + self.assertFalse(check_json_float(float('inf'))) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_invalid_negative_infinity(self): + """Test that negative infinity is invalid""" + try: + from visualizer import check_json_float + self.assertFalse(check_json_float(float('-inf'))) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_invalid_nan(self): + """Test that NaN is invalid""" + try: + from visualizer import check_json_float + self.assertFalse(check_json_float(float('nan'))) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_invalid_none(self): + """Test that None is invalid (not a number)""" + try: + from visualizer import check_json_float + self.assertFalse(check_json_float(None)) + except ImportError: + self.skipTest("Visualizer module not available") + + +class TestSanitizeProgramForVisualization(unittest.TestCase): + """Tests for the sanitize_program_for_visualization function""" + + @classmethod + def setUpClass(cls): + """Add scripts directory to path for importing visualizer""" + cls.scripts_path = os.path.join( + os.path.dirname(__file__), "..", "scripts" + ) + sys.path.insert(0, cls.scripts_path) + + @classmethod + def tearDownClass(cls): + """Remove scripts directory from path""" + if cls.scripts_path in sys.path: + sys.path.remove(cls.scripts_path) + + def test_sanitize_negative_infinity_in_metrics(self): + """Test that -inf in metrics is sanitized to None""" + try: + from visualizer import sanitize_program_for_visualization + + program = { + "metrics": {"combined_score": float('-inf')}, + "metadata": {} + } + sanitize_program_for_visualization(program) + self.assertIsNone(program["metrics"]["combined_score"]) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_sanitize_positive_infinity_in_metrics(self): + """Test that +inf in metrics is sanitized to None""" + try: + from visualizer import sanitize_program_for_visualization + + program = { + "metrics": {"score": float('inf')}, + "metadata": {} + } + sanitize_program_for_visualization(program) + self.assertIsNone(program["metrics"]["score"]) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_sanitize_nan_in_metrics(self): + """Test that NaN in metrics is sanitized to None""" + try: + from visualizer import sanitize_program_for_visualization + + program = { + "metrics": {"validity": float('nan')}, + "metadata": {} + } + sanitize_program_for_visualization(program) + self.assertIsNone(program["metrics"]["validity"]) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_valid_metrics_unchanged(self): + """Test that valid metrics are not changed""" + try: + from visualizer import sanitize_program_for_visualization + + program = { + "metrics": {"score": 0.85, "validity": 1.0}, + "metadata": {} + } + sanitize_program_for_visualization(program) + self.assertEqual(program["metrics"]["score"], 0.85) + self.assertEqual(program["metrics"]["validity"], 1.0) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_sanitize_parent_metrics(self): + """Test that parent_metrics are also sanitized""" + try: + from visualizer import sanitize_program_for_visualization + + program = { + "metrics": {"score": 0.5}, + "metadata": { + "parent_metrics": { + "score": float('inf'), + "other": 1.0, + } + } + } + sanitize_program_for_visualization(program) + self.assertIsNone(program["metadata"]["parent_metrics"]["score"]) + self.assertEqual(program["metadata"]["parent_metrics"]["other"], 1.0) + except ImportError: + self.skipTest("Visualizer module not available") + + def test_mixed_valid_and_invalid_values(self): + """Test sanitization with mix of valid and invalid values""" + try: + from visualizer import sanitize_program_for_visualization + + program = { + "metrics": { + "score": 0.5, + "combined_score": float('-inf'), + "validity": float('nan'), + "eval_time": 1.23, + }, + "metadata": {} + } + sanitize_program_for_visualization(program) + + self.assertEqual(program["metrics"]["score"], 0.5) + self.assertIsNone(program["metrics"]["combined_score"]) + self.assertIsNone(program["metrics"]["validity"]) + self.assertEqual(program["metrics"]["eval_time"], 1.23) + except ImportError: + self.skipTest("Visualizer module not available") + + +class TestJsonSerialization(unittest.TestCase): + """Tests for JSON serialization of sanitized data""" + + def test_sanitized_data_is_json_serializable(self): + """Test that sanitized data can be JSON serialized""" + data = { + "metrics": { + "score": None, # Sanitized from -inf + "other": 1.0, + } + } + # This should not raise an error + json_str = json.dumps(data) + self.assertIn('"score": null', json_str) + + def test_unsanitized_infinity_fails_strict_json(self): + """Test that unsanitized infinity fails strict JSON serialization""" + data = {"score": float('inf')} + # With allow_nan=False (strict JSON compliance), this should fail + with self.assertRaises(ValueError): + json.dumps(data, allow_nan=False) + + def test_unsanitized_nan_fails_strict_json(self): + """Test that unsanitized NaN fails strict JSON serialization""" + data = {"score": float('nan')} + # With allow_nan=False (strict JSON compliance), this should fail + with self.assertRaises(ValueError): + json.dumps(data, allow_nan=False) + + def test_sanitized_none_works_with_strict_json(self): + """Test that sanitized None values work with strict JSON""" + data = {"score": None} # Properly sanitized + # Should work with strict JSON + json_str = json.dumps(data, allow_nan=False) + self.assertIn('"score": null', json_str) + + +if __name__ == "__main__": + unittest.main()