feat: add option to augment prompt

crisbeto · crisbeto · commit a5c5f9c777e7 · 2025-12-19T18:51:57.000+02:00
Adds an option to the environment config that allows users to augment the resolved prompts before they're sent out.
diff --git a/runner/configuration/environment-config.ts b/runner/configuration/environment-config.ts
@@ -8,7 +8,9 @@ import {
   LocalExecutorConfig,
   localExecutorConfigSchema,
 } from '../orchestration/executors/local-executor-config.js';
-import {RatingContextFilter, ReportContextFilter} from '../shared-interfaces.js';
+import {PromptDefinition, RatingContextFilter, ReportContextFilter} from '../shared-interfaces.js';
+import type {Environment} from './environment.js';
+import type {GenkitRunner} from '../codegen/genkit/genkit-runner.js';
 
 export const environmentConfigSchema = z.object({
   /** Display name for the environment. */
@@ -118,6 +120,13 @@ export const environmentConfigSchema = z.object({
       }),
     )
     .optional(),
+
+  /**
+   * Function that can be used to augment prompts before they're evaluated.
+   */
+  augmentExecutablePrompt: z
+    .function(z.tuple([z.custom<PromptAugmentationContext>()]), z.promise(z.string()))
+    .optional(),
 });
 
 /**
@@ -127,6 +136,16 @@ export const environmentConfigSchema = z.object({
 export type EnvironmentConfig = z.infer<typeof environmentConfigSchema> &
   Partial<LocalExecutorConfig>;
 
+/** Context passed to the `augmentExecutablePrompt` function. */
+export interface PromptAugmentationContext {
+  /** Definition being augmented. */
+  promptDef: PromptDefinition;
+  /** Environment running the evaluation. */
+  environment: Environment;
+  /** Runner that the user can use for augmentation. */
+  runner: GenkitRunner;
+}
+
 /** Asserts that the specified data is a valid environment config. */
 export function assertIsEnvironmentConfig(value: unknown): asserts value is EnvironmentConfig {
   const validationResult = environmentConfigSchema
diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts
@@ -14,11 +14,13 @@ import {
 import {UserFacingError} from '../utils/errors.js';
 import {generateId} from '../utils/id-generation.js';
 import {lazy} from '../utils/lazy-creation.js';
-import {EnvironmentConfig} from './environment-config.js';
+import {EnvironmentConfig, PromptAugmentationContext} from './environment-config.js';
 import {EvalPromptWithMetadata, MultiStepPrompt} from './prompts.js';
 import {renderPromptTemplate} from './prompt-templating.js';
 import {getSha256Hash} from '../utils/hashing.js';
 import {DEFAULT_SUMMARY_MODEL} from './constants.js';
+import type {GenkitRunner} from '../codegen/genkit/genkit-runner.js';
+import {getRunnerByName} from '../codegen/runner-creation.js';
 
 interface CategoryConfig {
   name: string;
@@ -73,6 +75,14 @@ export class Environment {
   /** Ratings configured at the environment level. */
   private readonly ratings: Rating[];
 
+  /** User-configured function used to augment prompts. */
+  private readonly augmentExecutablePrompt:
+    | ((context: PromptAugmentationContext) => Promise<string>)
+    | null;
+
+  /** Runner that user can use to access an LLM to augment prompts. */
+  private augmentationRunner: GenkitRunner | null = null;
+
   constructor(
     rootPath: string,
     private readonly config: EnvironmentConfig & Required<Pick<EnvironmentConfig, 'executor'>>,
@@ -103,26 +113,27 @@ export class Environment {
     this.ratings = this.resolveRatings(config);
     this.ratingHash = this.getRatingHash(this.ratings, this.ratingCategories);
     this.analysisPrompts = this.resolveAnalysisPrompts(config);
+    this.augmentExecutablePrompt = config.augmentExecutablePrompt || null;
     this.validateRatingHash(this.ratingHash, config);
   }
 
   /** Prompts that should be executed as a part of the evaluation. */
-  executablePrompts = lazy(async () => {
+  readonly executablePrompts = lazy(async () => {
     return this.resolveExecutablePrompts(this.config.executablePrompts);
   });
 
-  systemPromptGeneration = lazy(async () => {
+  readonly systemPromptGeneration = lazy(async () => {
     return (await this.renderSystemPrompt(this.config.generationSystemPrompt)).result;
   });
 
-  systemPromptRepair = lazy(async () => {
+  readonly systemPromptRepair = lazy(async () => {
     if (!this.config.repairSystemPrompt) {
       return 'Please fix the given errors and return the corrected code.';
     }
     return (await this.renderSystemPrompt(this.config.repairSystemPrompt)).result;
   });
 
-  systemPromptEditing = lazy(async () => {
+  readonly systemPromptEditing = lazy(async () => {
     if (!this.config.editingSystemPrompt) {
       return this.systemPromptGeneration();
     }
@@ -180,6 +191,14 @@ export class Environment {
     });
   }
 
+  async destroy(): Promise<void> {
+    await this.executor.destroy();
+
+    if (this.augmentationRunner) {
+      await this.augmentationRunner.dispose();
+    }
+  }
+
   /**
    * Gets the readable display name of a framework, based on its ID.
    * @param id ID to be resolved.
@@ -209,16 +228,16 @@ export class Environment {
    * @param config Configuration for the environment.
    */
   private async resolveExecutablePrompts(
-    prompts: EnvironmentConfig['executablePrompts'],
+    definitions: EnvironmentConfig['executablePrompts'],
   ): Promise<RootPromptDefinition[]> {
-    const result: Promise<RootPromptDefinition>[] = [];
+    const promptPromises: Promise<RootPromptDefinition>[] = [];
     const envRatings = this.ratings;
 
-    for (const def of prompts) {
+    for (const def of definitions) {
       if (def instanceof MultiStepPrompt) {
-        result.push(this.getMultiStepPrompt(def, envRatings));
+        promptPromises.push(this.getMultiStepPrompt(def, envRatings));
       } else if (def instanceof EvalPromptWithMetadata) {
-        result.push(
+        promptPromises.push(
           Promise.resolve({
             name: def.name,
             kind: 'single',
@@ -243,10 +262,10 @@ export class Environment {
           name = def.name;
         }
 
-        result.push(
+        promptPromises.push(
           ...globSync(path, {cwd: this.rootPath}).map(
             async relativePath =>
-              await this.getStepPromptDefinition(
+              await this.getSinglePromptDefinition(
                 name ?? basename(relativePath, extname(relativePath)),
                 relativePath,
                 ratings,
@@ -258,19 +277,47 @@ export class Environment {
       }
     }
 
-    return Promise.all(result);
+    const prompts = await Promise.all(promptPromises);
+
+    if (this.augmentExecutablePrompt) {
+      const augmentationPromises: Promise<unknown>[] = [];
+      const updatePrompt = (promptDef: PromptDefinition) => {
+        augmentationPromises.push(
+          this.augmentExecutablePrompt!({
+            promptDef,
+            environment: this,
+            runner: this.augmentationRunner!,
+          }).then(text => (promptDef.prompt = text)),
+        );
+      };
+      this.augmentationRunner ??= await getRunnerByName('genkit');
+
+      for (const rootPrompt of prompts) {
+        if (rootPrompt.kind === 'multi-step') {
+          for (const promptDef of rootPrompt.steps) {
+            updatePrompt(promptDef);
+          }
+        } else {
+          updatePrompt(rootPrompt);
+        }
+      }
+
+      await Promise.all(augmentationPromises);
+    }
+
+    return prompts;
   }
 
   /**
-   * Creates a prompt definition for a given step.
+   * Creates a prompt definition for a single prompt.
    *
    * @param name Name of the prompt.
    * @param rootPath Root path of the project.
    * @param relativePath Relative path to the prompt.
    * @param ratings Ratings to run against the definition.
    * @param isEditing Whether this is an editing or generation step.
    */
-  private async getStepPromptDefinition<Metadata>(
+  private async getSinglePromptDefinition<Metadata>(
     name: string,
     relativePath: string,
     ratings: Rating[],
@@ -345,11 +392,11 @@ export class Environment {
       if (stepNum === 0) {
         throw new UserFacingError('Multi-step prompts start with `step-1`.');
       }
-      const step = await this.getStepPromptDefinition(
+      const step = await this.getSinglePromptDefinition(
         `${name}-step-${stepNum}`,
         join(def.directoryPath, current.name),
         ratings,
-        /*isEditing */ stepNum !== 1,
+        /* isEditing */ stepNum !== 1,
         stepMetadata,
       );
 
diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts
@@ -48,16 +48,24 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
   const cleanup = async () => {
     // Clean-up should never interrupt a potentially passing completion.
     try {
-      await env.executor.destroy();
-      for (const cleanupFn of extraCleanupFns) {
-        await cleanupFn();
-      }
+      await env.destroy();
     } catch (e) {
-      console.error(`Failed to destroy executor: ${e}`);
+      console.error(`Failed to destroy environment: ${e}`);
       if (e instanceof Error) {
         console.error(e.stack);
       }
     }
+
+    for (const cleanupFn of extraCleanupFns) {
+      try {
+        await cleanupFn();
+      } catch (e) {
+        console.error(`Failed cleanup: ${e}`);
+        if (e instanceof Error) {
+          console.error(e.stack);
+        }
+      }
+    }
   };
 
   // Ensure cleanup logic runs when the evaluation is aborted.