From a472b962c0aba01343de941ad6f95cd9a203bc4e Mon Sep 17 00:00:00 2001 From: Kristiyan Kostadinov Date: Fri, 19 Dec 2025 12:02:57 +0200 Subject: [PATCH 1/2] feat: add option to augment prompt Adds an option to the environment config that allows users to augment the resolved prompts before they're sent out. --- runner/configuration/environment-config.ts | 21 +++++- runner/configuration/environment.ts | 81 +++++++++++++++++----- runner/orchestration/generate.ts | 18 +++-- 3 files changed, 97 insertions(+), 23 deletions(-) diff --git a/runner/configuration/environment-config.ts b/runner/configuration/environment-config.ts index fefd21c..01fafc1 100644 --- a/runner/configuration/environment-config.ts +++ b/runner/configuration/environment-config.ts @@ -8,7 +8,9 @@ import { LocalExecutorConfig, localExecutorConfigSchema, } from '../orchestration/executors/local-executor-config.js'; -import {RatingContextFilter, ReportContextFilter} from '../shared-interfaces.js'; +import {PromptDefinition, RatingContextFilter, ReportContextFilter} from '../shared-interfaces.js'; +import type {Environment} from './environment.js'; +import type {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; export const environmentConfigSchema = z.object({ /** Display name for the environment. */ @@ -118,6 +120,13 @@ export const environmentConfigSchema = z.object({ }), ) .optional(), + + /** + * Function that can be used to augment prompts before they're evaluated. + */ + augmentExecutablePrompt: z + .function(z.tuple([z.custom()]), z.promise(z.string())) + .optional(), }); /** @@ -127,6 +136,16 @@ export const environmentConfigSchema = z.object({ export type EnvironmentConfig = z.infer & Partial; +/** Context passed to the `augmentExecutablePrompt` function. */ +export interface PromptAugmentationContext { + /** Definition being augmented. */ + promptDef: PromptDefinition; + /** Environment running the evaluation. */ + environment: Environment; + /** Runner that the user can use for augmentation. */ + runner: GenkitRunner; +} + /** Asserts that the specified data is a valid environment config. */ export function assertIsEnvironmentConfig(value: unknown): asserts value is EnvironmentConfig { const validationResult = environmentConfigSchema diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts index fb46334..b4fcd46 100644 --- a/runner/configuration/environment.ts +++ b/runner/configuration/environment.ts @@ -14,11 +14,13 @@ import { import {UserFacingError} from '../utils/errors.js'; import {generateId} from '../utils/id-generation.js'; import {lazy} from '../utils/lazy-creation.js'; -import {EnvironmentConfig} from './environment-config.js'; +import {EnvironmentConfig, PromptAugmentationContext} from './environment-config.js'; import {EvalPromptWithMetadata, MultiStepPrompt} from './prompts.js'; import {renderPromptTemplate} from './prompt-templating.js'; import {getSha256Hash} from '../utils/hashing.js'; import {DEFAULT_SUMMARY_MODEL} from './constants.js'; +import type {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; +import {getRunnerByName} from '../codegen/runner-creation.js'; interface CategoryConfig { name: string; @@ -73,6 +75,14 @@ export class Environment { /** Ratings configured at the environment level. */ private readonly ratings: Rating[]; + /** User-configured function used to augment prompts. */ + private readonly augmentExecutablePrompt: + | ((context: PromptAugmentationContext) => Promise) + | null; + + /** Runner that user can use to access an LLM to augment prompts. */ + private augmentationRunner: GenkitRunner | null = null; + constructor( rootPath: string, private readonly config: EnvironmentConfig & Required>, @@ -103,26 +113,27 @@ export class Environment { this.ratings = this.resolveRatings(config); this.ratingHash = this.getRatingHash(this.ratings, this.ratingCategories); this.analysisPrompts = this.resolveAnalysisPrompts(config); + this.augmentExecutablePrompt = config.augmentExecutablePrompt || null; this.validateRatingHash(this.ratingHash, config); } /** Prompts that should be executed as a part of the evaluation. */ - executablePrompts = lazy(async () => { + readonly executablePrompts = lazy(async () => { return this.resolveExecutablePrompts(this.config.executablePrompts); }); - systemPromptGeneration = lazy(async () => { + readonly systemPromptGeneration = lazy(async () => { return (await this.renderSystemPrompt(this.config.generationSystemPrompt)).result; }); - systemPromptRepair = lazy(async () => { + readonly systemPromptRepair = lazy(async () => { if (!this.config.repairSystemPrompt) { return 'Please fix the given errors and return the corrected code.'; } return (await this.renderSystemPrompt(this.config.repairSystemPrompt)).result; }); - systemPromptEditing = lazy(async () => { + readonly systemPromptEditing = lazy(async () => { if (!this.config.editingSystemPrompt) { return this.systemPromptGeneration(); } @@ -180,6 +191,14 @@ export class Environment { }); } + async destroy(): Promise { + await this.executor.destroy(); + + if (this.augmentationRunner) { + await this.augmentationRunner.dispose(); + } + } + /** * Gets the readable display name of a framework, based on its ID. * @param id ID to be resolved. @@ -209,16 +228,16 @@ export class Environment { * @param config Configuration for the environment. */ private async resolveExecutablePrompts( - prompts: EnvironmentConfig['executablePrompts'], + definitions: EnvironmentConfig['executablePrompts'], ): Promise { - const result: Promise[] = []; + const promptPromises: Promise[] = []; const envRatings = this.ratings; - for (const def of prompts) { + for (const def of definitions) { if (def instanceof MultiStepPrompt) { - result.push(this.getMultiStepPrompt(def, envRatings)); + promptPromises.push(this.getMultiStepPrompt(def, envRatings)); } else if (def instanceof EvalPromptWithMetadata) { - result.push( + promptPromises.push( Promise.resolve({ name: def.name, kind: 'single', @@ -243,10 +262,10 @@ export class Environment { name = def.name; } - result.push( + promptPromises.push( ...globSync(path, {cwd: this.rootPath}).map( async relativePath => - await this.getStepPromptDefinition( + await this.getSinglePromptDefinition( name ?? basename(relativePath, extname(relativePath)), relativePath, ratings, @@ -258,11 +277,39 @@ export class Environment { } } - return Promise.all(result); + const prompts = await Promise.all(promptPromises); + + if (this.augmentExecutablePrompt) { + const augmentationPromises: Promise[] = []; + const updatePrompt = (promptDef: PromptDefinition) => { + augmentationPromises.push( + this.augmentExecutablePrompt!({ + promptDef, + environment: this, + runner: this.augmentationRunner!, + }).then(text => (promptDef.prompt = text)), + ); + }; + this.augmentationRunner ??= await getRunnerByName('genkit'); + + for (const rootPrompt of prompts) { + if (rootPrompt.kind === 'multi-step') { + for (const promptDef of rootPrompt.steps) { + updatePrompt(promptDef); + } + } else { + updatePrompt(rootPrompt); + } + } + + await Promise.all(augmentationPromises); + } + + return prompts; } /** - * Creates a prompt definition for a given step. + * Creates a prompt definition for a single prompt. * * @param name Name of the prompt. * @param rootPath Root path of the project. @@ -270,7 +317,7 @@ export class Environment { * @param ratings Ratings to run against the definition. * @param isEditing Whether this is an editing or generation step. */ - private async getStepPromptDefinition( + private async getSinglePromptDefinition( name: string, relativePath: string, ratings: Rating[], @@ -345,11 +392,11 @@ export class Environment { if (stepNum === 0) { throw new UserFacingError('Multi-step prompts start with `step-1`.'); } - const step = await this.getStepPromptDefinition( + const step = await this.getSinglePromptDefinition( `${name}-step-${stepNum}`, join(def.directoryPath, current.name), ratings, - /*isEditing */ stepNum !== 1, + /* isEditing */ stepNum !== 1, stepMetadata, ); diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts index 4c6e930..42ba251 100644 --- a/runner/orchestration/generate.ts +++ b/runner/orchestration/generate.ts @@ -48,16 +48,24 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise< const cleanup = async () => { // Clean-up should never interrupt a potentially passing completion. try { - await env.executor.destroy(); - for (const cleanupFn of extraCleanupFns) { - await cleanupFn(); - } + await env.destroy(); } catch (e) { - console.error(`Failed to destroy executor: ${e}`); + console.error(`Failed to destroy environment: ${e}`); if (e instanceof Error) { console.error(e.stack); } } + + for (const cleanupFn of extraCleanupFns) { + try { + await cleanupFn(); + } catch (e) { + console.error(`Failed cleanup: ${e}`); + if (e instanceof Error) { + console.error(e.stack); + } + } + } }; // Ensure cleanup logic runs when the evaluation is aborted. From bab8e22c382021fbe20b05f035f4f05985be9f2e Mon Sep 17 00:00:00 2001 From: Kristiyan Kostadinov Date: Fri, 19 Dec 2025 12:39:35 +0200 Subject: [PATCH 2/2] feat: add function for augmenting generated files Adds the ability for the user to augment a generated response before it is evaluated. --- runner/configuration/environment-config.ts | 14 +++++++++++++- runner/configuration/environment.ts | 12 ++++++++++++ runner/orchestration/codegen.ts | 1 + runner/orchestration/generate-initial-files.ts | 3 ++- runner/orchestration/repair.ts | 2 +- 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/runner/configuration/environment-config.ts b/runner/configuration/environment-config.ts index 01fafc1..fd9bf2f 100644 --- a/runner/configuration/environment-config.ts +++ b/runner/configuration/environment-config.ts @@ -8,7 +8,12 @@ import { LocalExecutorConfig, localExecutorConfigSchema, } from '../orchestration/executors/local-executor-config.js'; -import {PromptDefinition, RatingContextFilter, ReportContextFilter} from '../shared-interfaces.js'; +import { + LlmResponseFile, + PromptDefinition, + RatingContextFilter, + ReportContextFilter, +} from '../shared-interfaces.js'; import type {Environment} from './environment.js'; import type {GenkitRunner} from '../codegen/genkit/genkit-runner.js'; @@ -127,6 +132,13 @@ export const environmentConfigSchema = z.object({ augmentExecutablePrompt: z .function(z.tuple([z.custom()]), z.promise(z.string())) .optional(), + + /** + * Function that can be used to augment generated files before they're evaluated. + */ + augmentGeneratedFile: z + .function(z.tuple([z.custom>()]), z.string()) + .optional(), }); /** diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts index b4fcd46..f7f39c1 100644 --- a/runner/configuration/environment.ts +++ b/runner/configuration/environment.ts @@ -5,6 +5,7 @@ import {Executor} from '../orchestration/executors/executor.js'; import {Rating, RatingCategory} from '../ratings/rating-types.js'; import { FrameworkInfo, + LlmResponseFile, MultiStepPromptDefinition, PromptDefinition, RatingContextFilter, @@ -83,6 +84,9 @@ export class Environment { /** Runner that user can use to access an LLM to augment prompts. */ private augmentationRunner: GenkitRunner | null = null; + /** User-provided callback for augmenting the LLM-generated files. */ + private readonly augmentFileCallback: ((file: LlmResponseFile) => string) | null; + constructor( rootPath: string, private readonly config: EnvironmentConfig & Required>, @@ -114,6 +118,7 @@ export class Environment { this.ratingHash = this.getRatingHash(this.ratings, this.ratingCategories); this.analysisPrompts = this.resolveAnalysisPrompts(config); this.augmentExecutablePrompt = config.augmentExecutablePrompt || null; + this.augmentFileCallback = config.augmentGeneratedFile || null; this.validateRatingHash(this.ratingHash, config); } @@ -191,6 +196,13 @@ export class Environment { }); } + /** Augments response files based on the user's configuration. */ + augmentResponseFiles(files: LlmResponseFile[]): void { + if (this.augmentFileCallback) { + files.forEach(file => (file.code = this.augmentFileCallback!(file))); + } + } + async destroy(): Promise { await this.executor.destroy(); diff --git a/runner/orchestration/codegen.ts b/runner/orchestration/codegen.ts index 60ae72c..fe19a94 100644 --- a/runner/orchestration/codegen.ts +++ b/runner/orchestration/codegen.ts @@ -129,6 +129,7 @@ export async function repairCodeWithAI( ); if (response.success) { + env.augmentResponseFiles(response.outputFiles); progress.log( promptDef, 'codegen', diff --git a/runner/orchestration/generate-initial-files.ts b/runner/orchestration/generate-initial-files.ts index b137ecb..6a0495d 100644 --- a/runner/orchestration/generate-initial-files.ts +++ b/runner/orchestration/generate-initial-files.ts @@ -75,6 +75,7 @@ export async function generateInitialFiles( ); if (response.success) { + env.augmentResponseFiles(response.outputFiles); progress.log( promptDef, 'codegen', @@ -90,7 +91,7 @@ export async function generateInitialFiles( } return { - files: response.outputFiles!, + files: response.outputFiles, usage: response.usage, reasoning: response.reasoning, toolLogs: response.toolLogs, diff --git a/runner/orchestration/repair.ts b/runner/orchestration/repair.ts index 13acc24..152acc1 100644 --- a/runner/orchestration/repair.ts +++ b/runner/orchestration/repair.ts @@ -126,7 +126,7 @@ async function handleRepairResponse( const newAttemptFiles = previousAttemptFiles.map(f => ({...f})); mergeRepairFiles(repairResponse.outputFiles, newAttemptFiles); - writeResponseFiles(directory, newAttemptFiles, env, rootPromptDef.name); + await writeResponseFiles(directory, newAttemptFiles, env, rootPromptDef.name); const buildResult = await runBuild( evalID,