From c925fe1fdb23b90fa00e290a69f41ba47e6c498e Mon Sep 17 00:00:00 2001 From: bluebread Date: Fri, 10 Oct 2025 04:34:30 +0000 Subject: [PATCH 1/3] Add novelty-based rejection sampling to prevent duplicate programs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces a novelty checking system that uses embeddings and LLM-based judging to ensure programs added to the database are meaningfully different from existing ones in their island. Changes: - Added EmbeddingClient class for computing code embeddings using OpenAI/Azure models - Added novelty judge prompts for LLM-based similarity assessment - Modified ProgramDatabase to support novelty checking before adding programs - Added embedding vector field to Program dataclass - Added configuration options for embedding model, novelty LLM, and similarity threshold - Integrated novelty LLM ensemble in controller setup - Updated example config with novelty checking parameters The novelty check uses cosine similarity of embeddings as a first pass, then uses an LLM judge to determine if programs exceeding the similarity threshold are truly novel or just trivial variations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- examples/function_minimization/config.yaml | 3 + openevolve/config.py | 8 +- openevolve/controller.py | 1 + openevolve/database.py | 129 +++++++++++++++++++++ openevolve/embedding.py | 91 +++++++++++++++ openevolve/novelty_judge.py | 40 +++++++ 6 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 openevolve/embedding.py create mode 100644 openevolve/novelty_judge.py diff --git a/examples/function_minimization/config.yaml b/examples/function_minimization/config.yaml index 36baa086a..13ff18468 100644 --- a/examples/function_minimization/config.yaml +++ b/examples/function_minimization/config.yaml @@ -28,6 +28,9 @@ database: elite_selection_ratio: 0.2 exploitation_ratio: 0.7 + embedding_model: "text-embedding-3-small" + similarity_threshold: 0.99 + # Evaluator configuration evaluator: timeout: 60 diff --git a/openevolve/config.py b/openevolve/config.py index affc1e25d..57fcaa46e 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -5,10 +5,13 @@ import os from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union import yaml +if TYPE_CHECKING: + from openevolve.llm.base import LLMInterface + @dataclass class LLMModelConfig: @@ -283,6 +286,9 @@ class DatabaseConfig: cleanup_old_artifacts: bool = True artifact_retention_days: int = 30 + novelty_llm: Optional["LLMInterface"] = None + embedding_model: Optional[str] = None + similarity_threshold: float = 0.99 @dataclass class EvaluatorConfig: diff --git a/openevolve/controller.py b/openevolve/controller.py index 521659a9b..4f27f0437 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -154,6 +154,7 @@ def __init__( if self.config.random_seed is not None: self.config.database.random_seed = self.config.random_seed + self.config.database.novelty_llm = self.llm_ensemble self.database = ProgramDatabase(self.config.database) self.evaluator = Evaluator( diff --git a/openevolve/database.py b/openevolve/database.py index 9003198c1..e472afb2a 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -72,6 +72,9 @@ class Program: artifacts_json: Optional[str] = None # JSON-serialized small artifacts artifact_dir: Optional[str] = None # Path to large artifact files + # Embedding vector for novelty rejection sampling + embedding: Optional[List[float]] = None + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation""" return asdict(self) @@ -183,6 +186,13 @@ def __init__(self, config: DatabaseConfig): } logger.info(f"Initialized program database with {len(self.programs)} programs") + + # Novelty judge setup + from openevolve.embedding import EmbeddingClient + self.novelty_llm = config.novelty_llm + self.embedding_client = EmbeddingClient(config.embedding_model) if config.embedding_model else None + self.similarity_threshold = config.similarity_threshold + def add( self, program: Program, iteration: int = None, target_island: Optional[int] = None @@ -240,6 +250,11 @@ def add( island_idx = island_idx % len(self.islands) # Ensure valid island + # Novelty check before adding + if not self._is_novel(program, island_idx): + logger.debug(f"Program {program.id} failed in novelty check and won't be added in the island {island_idx}") + return program.id # Do not add non-novel program + # Add to island-specific feature map (replacing existing if better) feature_key = self._feature_coords_to_key(feature_coords) island_feature_map = self.island_feature_maps[island_idx] @@ -931,6 +946,120 @@ def _feature_coords_to_key(self, coords: List[int]) -> str: """ return "-".join(str(c) for c in coords) + def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float: + """ + Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License) + Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/database/dbase.py#L1452 + + Compute cosine similarity between two vectors. + """ + if not vec1 or not vec2 or len(vec1) != len(vec2): + return 0.0 + + arr1 = np.array(vec1, dtype=np.float32) + arr2 = np.array(vec2, dtype=np.float32) + + norm_a = np.linalg.norm(arr1) + norm_b = np.linalg.norm(arr2) + + if norm_a == 0 or norm_b == 0: + return 0.0 + + similarity = np.dot(arr1, arr2) / (norm_a * norm_b) + + return float(similarity) + + def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool: + """ + Use LLM to judge if a program is novel compared to a similar existing program + """ + import asyncio + from openevolve.novelty_judge import NOVELTY_SYSTEM_MSG, NOVELTY_USER_MSG + + user_msg = NOVELTY_USER_MSG.format( + language=program.language, + existing_code=similar_program.code, + proposed_code=program.code, + ) + + try: + content: str = asyncio.run( + self.novelty_llm.generate_with_context( + system_msg=NOVELTY_SYSTEM_MSG, + messages=[{"role": "user", "content": user_msg}], + ) + ) + + if content is None or content is None: + logger.warning("Novelty LLM returned empty response") + return True + + content = content.strip() + + # Parse the response + NOVEL_i = content.upper().find("NOVEL") + NOT_NOVEL_i = content.upper().find("NOT NOVEL") + + if NOVEL_i == -1 and NOT_NOVEL_i == -1: + logger.warning(f"Unexpected novelty LLM response: {content}") + return True # Assume novel if we can't parse + + if NOVEL_i != -1 and NOT_NOVEL_i != -1: + # Both found, take the one that appears first + is_novel = NOVEL_i < NOT_NOVEL_i + elif NOVEL_i != -1: + is_novel = True + else: + is_novel = False + + return is_novel + + except Exception as e: + logger.error(f"Error in novelty LLM check: {e}") + + return True + + def _is_novel(self, program_id: int, island_idx: int) -> bool: + """ + Determine if a program is novel based on diversity to existing programs + + Args: + program: Program to check + island_idx: Island index + + Returns: + True if novel, False otherwise + """ + if self.embedding_client is None or self.similarity_threshold <= 0.0: + # Novelty checking disabled + return True + + program = self.programs[program_id] + embd = self.embedding_client.get_embedding(program.code) + self.programs[program_id].embedding = embd + + max_smlty = float('-inf') + max_smlty_pid = None + + for pid in self.islands[island_idx]: + other = self.programs[pid] + + if other.embedding is None: + logger.log("Warning: Program %s has no embedding, skipping similarity check", other.id) + continue + + similarity = self._cosine_similarity(embd, other.embedding) + + if similarity >= max(max_smlty, self.similarity_threshold): + max_smlty = similarity + max_smlty_pid = pid + + if max_smlty_pid is None: + # No similar programs found, consider it novel + return True + + return self._llm_judge_novelty(program, self.programs[max_smlty_pid]) + def _is_better(self, program1: Program, program2: Program) -> bool: """ Determine if program1 has better FITNESS than program2 diff --git a/openevolve/embedding.py b/openevolve/embedding.py new file mode 100644 index 000000000..c66025a37 --- /dev/null +++ b/openevolve/embedding.py @@ -0,0 +1,91 @@ +""" +Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License) +Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/llm/embedding.py +""" + +import os +import openai +from typing import Union, List +import logging + +logger = logging.getLogger(__name__) + +M = 1_000_000 + +OPENAI_EMBEDDING_MODELS = [ + "text-embedding-3-small", + "text-embedding-3-large", +] + +AZURE_EMBEDDING_MODELS = [ + "azure-text-embedding-3-small", + "azure-text-embedding-3-large", +] + +OPENAI_EMBEDDING_COSTS = { + "text-embedding-3-small": 0.02 / M, + "text-embedding-3-large": 0.13 / M, +} + +class EmbeddingClient: + def __init__( + self, model_name: str = "text-embedding-3-small"): + """ + Initialize the EmbeddingClient. + + Args: + model (str): The OpenAI embedding model name to use. + """ + self.client, self.model = self._get_client_model(model_name) + + def _get_client_model(model_name: str) -> tuple[openai.OpenAI, str]: + if model_name in OPENAI_EMBEDDING_MODELS: + client = openai.OpenAI() + model_to_use = model_name + elif model_name in AZURE_EMBEDDING_MODELS: + # get rid of the azure- prefix + model_to_use = model_name.split("azure-")[-1] + client = openai.AzureOpenAI( + api_key=os.getenv("AZURE_OPENAI_API_KEY"), + api_version=os.getenv("AZURE_API_VERSION"), + azure_endpoint=os.getenv("AZURE_API_ENDPOINT"), + ) + else: + raise ValueError(f"Invalid embedding model: {model_name}") + + return client, model_to_use + + def get_embedding( + self, code: Union[str, List[str]] + ) -> Union[List[float], List[List[float]]]: + """ + Computes the text embedding for a code string. + + Args: + code (str, list[str]): The code as a string or list + of strings. + + Returns: + list: Embedding vector for the code or None if an error + occurs. + """ + if isinstance(code, str): + code = [code] + single_code = True + else: + single_code = False + try: + response = self.client.embeddings.create( + model=self.model, input=code, encoding_format="float" + ) + # Extract embedding from response + if single_code: + return response.data[0].embedding + else: + return [d.embedding for d in response.data] + except Exception as e: + logger.info(f"Error getting embedding: {e}") + if single_code: + return [], 0.0 + else: + return [[]], 0.0 diff --git a/openevolve/novelty_judge.py b/openevolve/novelty_judge.py new file mode 100644 index 000000000..aadf70bfe --- /dev/null +++ b/openevolve/novelty_judge.py @@ -0,0 +1,40 @@ +""" +Prompt templates for novelty judging using LLMs. +""" + +NOVELTY_SYSTEM_MSG = """You are an expert code reviewer tasked with determining if two code snippets are meaningfully different from each other. + +Your job is to analyze both programs and determine if the proposed code introduces meaningful changes compared to the existing code. Consider: + +1. **Algorithmic differences**: Different approaches, logic, or strategies +2. **Structural changes**: Different data structures, control flow, or organization +3. **Functional improvements**: New features, optimizations, or capabilities +4. **Implementation variations**: Different ways of achieving the same goal that could lead to different performance characteristics +5. **Hyperparameter changes**: Different hyperparameters that could lead to different performance characteristics + +Ignore trivial differences like: +- Variable name changes +- Minor formatting or style changes +- Comments or documentation changes +- Insignificant refactoring that doesn't change the core logic + +Respond with: +- **NOVEL**: If the codes are meaningfully different +- **NOT_NOVEL**: If the codes are essentially the same with only trivial differences + +After your decision, provide a brief explanation of your reasoning.""" + + +NOVELTY_USER_MSG = """Please analyze these two code snippets: + +**EXISTING CODE:** +```{language} +{existing_code} +``` + +**PROPOSED CODE:** +```{language} +{proposed_code} +``` + +Are these codes meaningfully different? Respond with NOVEL or NOT_NOVEL followed by your explanation.""" From 27b6f29b15994d0dc207feab1bf26fdc530d8699 Mon Sep 17 00:00:00 2001 From: bluebread Date: Fri, 10 Oct 2025 04:35:51 +0000 Subject: [PATCH 2/3] Fix novelty feature bugs and update test configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix novelty check in database.py to pass program.id instead of program object - Fix missing self parameter in embedding.py _get_client_model method - Fix serialization error by clearing novelty_llm before config deepcopy - Update function_minimization config for testing with different models 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- examples/function_minimization/config.yaml | 11 +++++++---- openevolve/database.py | 2 +- openevolve/embedding.py | 2 +- openevolve/process_parallel.py | 4 ++++ 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/examples/function_minimization/config.yaml b/examples/function_minimization/config.yaml index 13ff18468..42fb739de 100644 --- a/examples/function_minimization/config.yaml +++ b/examples/function_minimization/config.yaml @@ -1,17 +1,20 @@ # Configuration for function minimization example -max_iterations: 50 +max_iterations: 10 checkpoint_interval: 5 # LLM configuration llm: - primary_model: "gemini-2.5-flash-lite" + # primary_model: "gemini-2.5-flash-lite" + primary_model: "gpt-5-mini" # primary_model: "llama3.1-8b" primary_model_weight: 0.8 - secondary_model: "gemini-2.5-flash" + # secondary_model: "gemini-2.5-flash" # secondary_model: "llama-4-scout-17b-16e-instruct" + secondary_model: "gpt-5-nano" secondary_model_weight: 0.2 - api_base: "https://generativelanguage.googleapis.com/v1beta/openai/" + # api_base: "https://generativelanguage.googleapis.com/v1beta/openai/" # api_base: "https://api.cerebras.ai/v1" + api_base: "https://api.openai.com/v1" temperature: 0.7 max_tokens: 16000 timeout: 120 diff --git a/openevolve/database.py b/openevolve/database.py index e472afb2a..a1f793cc3 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -251,7 +251,7 @@ def add( island_idx = island_idx % len(self.islands) # Ensure valid island # Novelty check before adding - if not self._is_novel(program, island_idx): + if not self._is_novel(program.id, island_idx): logger.debug(f"Program {program.id} failed in novelty check and won't be added in the island {island_idx}") return program.id # Do not add non-novel program diff --git a/openevolve/embedding.py b/openevolve/embedding.py index c66025a37..74d3c57ab 100644 --- a/openevolve/embedding.py +++ b/openevolve/embedding.py @@ -38,7 +38,7 @@ def __init__( """ self.client, self.model = self._get_client_model(model_name) - def _get_client_model(model_name: str) -> tuple[openai.OpenAI, str]: + def _get_client_model(self, model_name: str) -> tuple[openai.OpenAI, str]: if model_name in OPENAI_EMBEDDING_MODELS: client = openai.OpenAI() model_to_use = model_name diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py index 7ff9ddf83..69766ce59 100644 --- a/openevolve/process_parallel.py +++ b/openevolve/process_parallel.py @@ -295,6 +295,10 @@ def __init__(self, config: Config, evaluation_file: str, database: ProgramDataba def _serialize_config(self, config: Config) -> dict: """Serialize config object to a dictionary that can be pickled""" # Manual serialization to handle nested objects properly + + # The asdict() call itself triggers the deepcopy which tries to serialize novelty_llm. Remove it first. + config.database.novelty_llm = None + return { "llm": { "models": [asdict(m) for m in config.llm.models], From 1ea8cc607a656f8f6b8859c1a0a7c1fd12d45d80 Mon Sep 17 00:00:00 2001 From: bluebread Date: Fri, 10 Oct 2025 05:13:52 +0000 Subject: [PATCH 3/3] Add missing attribution and source information in novelty judging prompt --- openevolve/novelty_judge.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openevolve/novelty_judge.py b/openevolve/novelty_judge.py index aadf70bfe..9075607b2 100644 --- a/openevolve/novelty_judge.py +++ b/openevolve/novelty_judge.py @@ -1,4 +1,7 @@ """ +Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License) +Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/llm/embedding.py + Prompt templates for novelty judging using LLMs. """