From eb1421b0e4f4b4111ce4c8c43bc828210b1f1b78 Mon Sep 17 00:00:00 2001
From: MashAliK <42744726+MashAliK@users.noreply.github.com>
Date: Sun, 25 May 2025 23:49:44 -0400
Subject: [PATCH 1/6] init

---
 examples/function_minimization/README.md    | 81 +++++++++++----------
 examples/function_minimization/evaluator.py | 59 ++++++++++-----
 2 files changed, 84 insertions(+), 56 deletions(-)

diff --git a/examples/function_minimization/README.md b/examples/function_minimization/README.md
index 3209a9298..200560782 100644
--- a/examples/function_minimization/README.md
+++ b/examples/function_minimization/README.md
@@ -62,21 +62,22 @@ def search_algorithm(iterations=1000, bounds=(-5, 5)):
 After running OpenEvolve, it discovered a simulated annealing algorithm with a completely different approach:
 
 ```python
-def simulated_annealing(bounds=(-5, 5), iterations=1000, step_size=0.1, initial_temperature=100, cooling_rate=0.99):
+def search_algorithm(bounds=(-5, 5), iterations=2000, initial_temperature=100, cooling_rate=0.97, step_size_factor=0.2, step_size_increase_threshold=20):
     """
     Simulated Annealing algorithm for function minimization.
     
     Args:
         bounds: Bounds for the search space (min, max)
         iterations: Number of iterations to run
-        step_size: Step size for perturbing the solution
         initial_temperature: Initial temperature for the simulated annealing process
         cooling_rate: Cooling rate for the simulated annealing process
-        
+        step_size_factor: Factor to scale the initial step size by the range
+        step_size_increase_threshold: Number of iterations without improvement before increasing step size
+
     Returns:
         Tuple of (best_x, best_y, best_value)
     """
-    # Initialize with a random point
+    # Initialize
     best_x = np.random.uniform(bounds[0], bounds[1])
     best_y = np.random.uniform(bounds[0], bounds[1])
     best_value = evaluate_function(best_x, best_y)
@@ -84,34 +85,50 @@ def simulated_annealing(bounds=(-5, 5), iterations=1000, step_size=0.1, initial_
     current_x, current_y = best_x, best_y
     current_value = best_value
     temperature = initial_temperature
+    step_size = (bounds[1] - bounds[0]) * step_size_factor  # Initial step size
+    min_temperature = 1e-6 # Avoid premature convergence
+    no_improvement_count = 0 # Counter for tracking stagnation
+
+    for i in range(iterations):
+        # Adaptive step size and temperature control
+        if i > iterations * 0.75:  # Reduce step size towards the end
+            step_size *= 0.5
+        if no_improvement_count > step_size_increase_threshold: # Increase step size if stuck
+            step_size *= 1.1
+            no_improvement_count = 0 # Reset the counter
+
+        step_size = min(step_size, (bounds[1] - bounds[0]) * 0.5) # Limit step size
 
-    for _ in range(iterations):
-        # Perturb the current solution
         new_x = current_x + np.random.uniform(-step_size, step_size)
         new_y = current_y + np.random.uniform(-step_size, step_size)
 
-        # Ensure the new solution is within bounds
+        # Keep the new points within the bounds
         new_x = max(bounds[0], min(new_x, bounds[1]))
         new_y = max(bounds[0], min(new_y, bounds[1]))
 
         new_value = evaluate_function(new_x, new_y)
 
-        # Calculate the acceptance probability
         if new_value < current_value:
+            # Accept the move if it's better
             current_x, current_y = new_x, new_y
             current_value = new_value
+            no_improvement_count = 0  # Reset counter
 
             if new_value < best_value:
+                # Update the best found solution
                 best_x, best_y = new_x, new_y
                 best_value = new_value
         else:
+            # Accept with a certain probability (Simulated Annealing)
             probability = np.exp((current_value - new_value) / temperature)
             if np.random.rand() < probability:
                 current_x, current_y = new_x, new_y
                 current_value = new_value
+                no_improvement_count = 0  # Reset counter
+            else:
+                no_improvement_count += 1 # Increment counter if not improving
 
-        # Cool down the temperature
-        temperature *= cooling_rate
+        temperature = max(temperature * cooling_rate, min_temperature) #Cool down
 
     return best_x, best_y, best_value
 ```
@@ -120,29 +137,17 @@ def simulated_annealing(bounds=(-5, 5), iterations=1000, step_size=0.1, initial_
 
 Through evolutionary iterations, OpenEvolve discovered several key algorithmic concepts:
 
-1. **Local Search**: Instead of random sampling across the entire space, the evolved algorithm makes small perturbations to promising solutions:
-   ```python
-   new_x = current_x + np.random.uniform(-step_size, step_size)
-   new_y = current_y + np.random.uniform(-step_size, step_size)
-   ```
-
-2. **Temperature-based Acceptance**: The algorithm can escape local minima by occasionally accepting worse solutions:
-   ```python
-   probability = np.exp((current_value - new_value) / temperature)
-   if np.random.rand() < probability:
-       current_x, current_y = new_x, new_y
-       current_value = new_value
-   ```
-
-3. **Cooling Schedule**: The temperature gradually decreases, transitioning from exploration to exploitation:
-   ```python
-   temperature *= cooling_rate
-   ```
-
-4. **Parameter Introduction**: The system discovered the need for additional parameters to control the algorithm's behavior:
-   ```python
-   def simulated_annealing(bounds=(-5, 5), iterations=1000, step_size=0.1, initial_temperature=100, cooling_rate=0.99):
-   ```
+1. **Memory and Exploitation**: The evolved algorithm tracks and updates the best solution seen so far, allowing for continual improvement rather than random restarting.
+
+2. **Exploration via Temperature**: Simulated annealing uses a “temperature” parameter to allow uphill moves early in the search, helping escape local minima that would trap simpler methods.
+
+3. **Adaptive Step Size**: The step size is adjusted dynamically—shrinking as the search converges and expanding if progress stalls—leading to better coverage and faster convergence.
+
+4. **Bounded Moves**: The algorithm ensures all candidate solutions remain within the feasible domain, avoiding wasted evaluations.
+
+5. **Stagnation Handling**: By counting iterations without improvement, the algorithm responds by boosting exploration when progress stalls.
+
+6. **Probabilistic Acceptance**: Moves to worse solutions are allowed with a probability that decays over time, providing a principled way to balance exploration and exploitation.
 
 ## Results
 
@@ -150,11 +155,13 @@ The evolved algorithm shows substantial improvement in finding better solutions:
 
 | Metric | Value |
 |--------|-------|
-| Value Score | 0.677 |
-| Distance Score | 0.258 |
+| Value Score | 0.990 |
+| Distance Score | 0.921 |
+| Standard Deviation Score | 0.900 |
+| Speed Score | 0.466 |
 | Reliability Score | 1.000 |
-| Overall Score | 0.917 |
-| Combined Score | 0.584 |
+| Overall Score | 0.984 |
+| Combined Score | 0.922 |
 
 The simulated annealing algorithm:
 - Achieves higher quality solutions (closer to the global minimum)
diff --git a/examples/function_minimization/evaluator.py b/examples/function_minimization/evaluator.py
index 0bd91ef79..2075a4f32 100644
--- a/examples/function_minimization/evaluator.py
+++ b/examples/function_minimization/evaluator.py
@@ -5,10 +5,8 @@
 import importlib.util
 import numpy as np
 import time
-import concurrent.futures
-import threading
+import multiprocessing
 import traceback
-import sys
 
 
 def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
@@ -24,14 +22,30 @@ def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
     Returns:
         Result of the function or raises TimeoutError
     """
-    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-        future = executor.submit(func, *args, **kwargs)
+    def wrapper(queue, func, args, kwargs):
         try:
-            return future.result(timeout=timeout_seconds)
-        except concurrent.futures.TimeoutError:
-            raise TimeoutError(
-                f"Function {func.__name__} timed out after {timeout_seconds} seconds"
-            )
+            result = func(*args, **kwargs)
+            queue.put(('success', result))
+        except Exception as e:
+            queue.put(('error', e))
+    
+    queue = multiprocessing.Queue()
+    process = multiprocessing.Process(target=wrapper, args=(queue, func, args, kwargs))
+    process.start()
+    process.join(timeout=timeout_seconds)
+    
+    if process.is_alive():
+        process.terminate()
+        process.join()
+        raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")
+    
+    if queue.empty():
+        raise TimeoutError("Function ended without returning a result")
+    
+    status, result = queue.get()
+    if status == 'error':
+        raise result
+    return result
 
 
 def safe_float(value):
@@ -78,6 +92,8 @@ def evaluate(program_path):
 
         # Run multiple trials
         num_trials = 10
+        x_values = []
+        y_values = []
         values = []
         distances = []
         times = []
@@ -119,14 +135,15 @@ def evaluate(program_path):
                     continue
 
                 # Calculate metrics
-                x_diff = safe_float(x) - GLOBAL_MIN_X
-                y_diff = safe_float(y) - GLOBAL_MIN_Y
+                x_diff = x - GLOBAL_MIN_X
+                y_diff = y - GLOBAL_MIN_Y
                 distance_to_global = np.sqrt(x_diff**2 + y_diff**2)
-                value_difference = abs(value - GLOBAL_MIN_VALUE)
 
-                values.append(float(value))
-                distances.append(float(distance_to_global))
-                times.append(float(end_time - start_time))
+                x_values.append(x)
+                y_values.append(y)
+                values.append(value)
+                distances.append(distance_to_global)
+                times.append(end_time - start_time)
                 success_count += 1
 
             except TimeoutError as e:
@@ -164,6 +181,11 @@ def evaluate(program_path):
         distance_score = float(1.0 / (1.0 + avg_distance))
         speed_score = float(1.0 / avg_time) if avg_time > 0 else 0.0
 
+        # calculate standard deviation scores
+        x_std_score = float(1.0 / (1.0 + np.std(x_values)))
+        y_std_score = float(1.0 / (1.0 + np.std(x_values)))
+        standard_deviation_score = (x_std_score + y_std_score) / 2.0
+
         # Normalize speed score (so it doesn't dominate)
         speed_score = float(min(speed_score, 10.0) / 10.0)
 
@@ -175,7 +197,7 @@ def evaluate(program_path):
         # Value and distance scores (quality of solution) get 90% of the weight
         # Speed and reliability get only 10% combined
         combined_score = float(
-            0.6 * value_score + 0.3 * distance_score + 0.05 * speed_score + 0.05 * reliability_score
+            0.35 * value_score + 0.35 * distance_score + standard_deviation_score * 0.20 + 0.05 * speed_score + 0.05 * reliability_score
         )
 
         # Also compute an "overall" score that will be the primary metric for selection
@@ -194,6 +216,7 @@ def evaluate(program_path):
         return {
             "value_score": value_score,
             "distance_score": distance_score,
+            "standard_deviation_score": standard_deviation_score,
             "speed_score": speed_score,
             "reliability_score": reliability_score,
             "combined_score": combined_score,
@@ -282,8 +305,6 @@ def evaluate_stage1(program_path):
             # Basic metrics with overall score
             return {
                 "runs_successfully": 1.0,
-                "value": float(value),
-                "distance": distance,
                 "value_score": value_score,
                 "distance_score": distance_score,
                 "overall_score": solution_quality,  # This becomes a strong guiding metric

From 2246b5f74da43445b1ea933450352a537e0aefe8 Mon Sep 17 00:00:00 2001
From: MashAliK <42744726+MashAliK@users.noreply.github.com>
Date: Sun, 25 May 2025 23:52:55 -0400
Subject: [PATCH 2/6] black format

---
 examples/function_minimization/evaluator.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/examples/function_minimization/evaluator.py b/examples/function_minimization/evaluator.py
index 2075a4f32..ff80a081d 100644
--- a/examples/function_minimization/evaluator.py
+++ b/examples/function_minimization/evaluator.py
@@ -22,28 +22,29 @@ def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
     Returns:
         Result of the function or raises TimeoutError
     """
+
     def wrapper(queue, func, args, kwargs):
         try:
             result = func(*args, **kwargs)
-            queue.put(('success', result))
+            queue.put(("success", result))
         except Exception as e:
-            queue.put(('error', e))
-    
+            queue.put(("error", e))
+
     queue = multiprocessing.Queue()
     process = multiprocessing.Process(target=wrapper, args=(queue, func, args, kwargs))
     process.start()
     process.join(timeout=timeout_seconds)
-    
+
     if process.is_alive():
         process.terminate()
         process.join()
         raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")
-    
+
     if queue.empty():
         raise TimeoutError("Function ended without returning a result")
-    
+
     status, result = queue.get()
-    if status == 'error':
+    if status == "error":
         raise result
     return result
 
@@ -197,7 +198,11 @@ def evaluate(program_path):
         # Value and distance scores (quality of solution) get 90% of the weight
         # Speed and reliability get only 10% combined
         combined_score = float(
-            0.35 * value_score + 0.35 * distance_score + standard_deviation_score * 0.20 + 0.05 * speed_score + 0.05 * reliability_score
+            0.35 * value_score
+            + 0.35 * distance_score
+            + standard_deviation_score * 0.20
+            + 0.05 * speed_score
+            + 0.05 * reliability_score
         )
 
         # Also compute an "overall" score that will be the primary metric for selection

From f877c6cad5e3d86f5e4f731349582ff486ffcc41 Mon Sep 17 00:00:00 2001
From: MashAliK <42744726+MashAliK@users.noreply.github.com>
Date: Mon, 26 May 2025 00:08:23 -0400
Subject: [PATCH 3/6] change Evolve scope

---
 examples/function_minimization/initial_program.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/function_minimization/initial_program.py b/examples/function_minimization/initial_program.py
index fef4d87c6..1de495608 100644
--- a/examples/function_minimization/initial_program.py
+++ b/examples/function_minimization/initial_program.py
@@ -31,16 +31,15 @@ def search_algorithm(iterations=1000, bounds=(-5, 5)):
 
     return best_x, best_y, best_value
 
+# EVOLVE-BLOCK-END
+
 
+# This part remains fixed (not evolved)
 def evaluate_function(x, y):
     """The complex function we're trying to minimize"""
     return np.sin(x) * np.cos(y) + np.sin(x * y) + (x**2 + y**2) / 20
 
 
-# EVOLVE-BLOCK-END
-
-
-# This part remains fixed (not evolved)
 def run_search():
     x, y, value = search_algorithm()
     return x, y, value

From ba014037670dd12e592c0d03e18735774400447b Mon Sep 17 00:00:00 2001
From: MashAliK <42744726+MashAliK@users.noreply.github.com>
Date: Mon, 26 May 2025 08:08:00 -0400
Subject: [PATCH 4/6] black reformat

---
 examples/function_minimization/initial_program.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/function_minimization/initial_program.py b/examples/function_minimization/initial_program.py
index 1de495608..670c02cc4 100644
--- a/examples/function_minimization/initial_program.py
+++ b/examples/function_minimization/initial_program.py
@@ -31,6 +31,7 @@ def search_algorithm(iterations=1000, bounds=(-5, 5)):
 
     return best_x, best_y, best_value
 
+
 # EVOLVE-BLOCK-END
 
 

From e5bef4f9ec29a83ccd1aaec77fc524568aaaebcc Mon Sep 17 00:00:00 2001
From: MashAliK <42744726+MashAliK@users.noreply.github.com>
Date: Mon, 26 May 2025 21:37:47 -0400
Subject: [PATCH 5/6] add examples to README

---
 examples/function_minimization/README.md | 38 +++++++++++++++++-------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/examples/function_minimization/README.md b/examples/function_minimization/README.md
index 200560782..051a664e5 100644
--- a/examples/function_minimization/README.md
+++ b/examples/function_minimization/README.md
@@ -137,17 +137,33 @@ def search_algorithm(bounds=(-5, 5), iterations=2000, initial_temperature=100, c
 
 Through evolutionary iterations, OpenEvolve discovered several key algorithmic concepts:
 
-1. **Memory and Exploitation**: The evolved algorithm tracks and updates the best solution seen so far, allowing for continual improvement rather than random restarting.
-
-2. **Exploration via Temperature**: Simulated annealing uses a “temperature” parameter to allow uphill moves early in the search, helping escape local minima that would trap simpler methods.
-
-3. **Adaptive Step Size**: The step size is adjusted dynamically—shrinking as the search converges and expanding if progress stalls—leading to better coverage and faster convergence.
-
-4. **Bounded Moves**: The algorithm ensures all candidate solutions remain within the feasible domain, avoiding wasted evaluations.
-
-5. **Stagnation Handling**: By counting iterations without improvement, the algorithm responds by boosting exploration when progress stalls.
-
-6. **Probabilistic Acceptance**: Moves to worse solutions are allowed with a probability that decays over time, providing a principled way to balance exploration and exploitation.
+1. **Exploration via Temperature**: Simulated annealing uses a `temperature` parameter to allow uphill moves early in the search, helping escape local minima that would trap simpler methods.
+    ```python
+    probability = np.exp((current_value - new_value) / temperature)
+    ```
+
+2. **Adaptive Step Size**: The step size is adjusted dynamically—shrinking as the search converges and expanding if progress stalls—leading to better coverage and faster convergence.
+    ```python
+    if i > iterations * 0.75:  # Reduce step size towards the end
+            step_size *= 0.5
+    if no_improvement_count > step_size_increase_threshold: # Increase step size if stuck
+        step_size *= 1.1
+        no_improvement_count = 0 # Reset the counter
+    ```
+
+3. **Bounded Moves**: The algorithm ensures all candidate solutions remain within the feasible domain, avoiding wasted evaluations.
+    ```python
+    # Keep the new points within the bounds
+    new_x = max(bounds[0], min(new_x, bounds[1]))
+    new_y = max(bounds[0], min(new_y, bounds[1]))
+    ```
+
+4. **Stagnation Handling**: By counting iterations without improvement, the algorithm responds by boosting exploration when progress stalls.
+    ```python
+    if no_improvement_count > step_size_increase_threshold: # Increase step size if stuck
+        step_size *= 1.1
+        no_improvement_count = 0 # Reset the counter
+    ```
 
 ## Results
 

From f2f10de52689d78f64a05fb9f4ab08533dc7c675 Mon Sep 17 00:00:00 2001
From: MashAliK <42744726+MashAliK@users.noreply.github.com>
Date: Mon, 26 May 2025 21:41:47 -0400
Subject: [PATCH 6/6] fix indentation

---
 examples/function_minimization/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/function_minimization/README.md b/examples/function_minimization/README.md
index 051a664e5..9eb80fb2b 100644
--- a/examples/function_minimization/README.md
+++ b/examples/function_minimization/README.md
@@ -145,7 +145,7 @@ Through evolutionary iterations, OpenEvolve discovered several key algorithmic c
 2. **Adaptive Step Size**: The step size is adjusted dynamically—shrinking as the search converges and expanding if progress stalls—leading to better coverage and faster convergence.
     ```python
     if i > iterations * 0.75:  # Reduce step size towards the end
-            step_size *= 0.5
+        step_size *= 0.5
     if no_improvement_count > step_size_increase_threshold: # Increase step size if stuck
         step_size *= 1.1
         no_improvement_count = 0 # Reset the counter