Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion runner/configuration/environment-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@ import {
LocalExecutorConfig,
localExecutorConfigSchema,
} from '../orchestration/executors/local-executor-config.js';
import {RatingContextFilter, ReportContextFilter} from '../shared-interfaces.js';
import {
LlmResponseFile,
PromptDefinition,
RatingContextFilter,
ReportContextFilter,
} from '../shared-interfaces.js';
import type {Environment} from './environment.js';
import type {GenkitRunner} from '../codegen/genkit/genkit-runner.js';

export const environmentConfigSchema = z.object({
/** Display name for the environment. */
Expand Down Expand Up @@ -118,6 +125,20 @@ export const environmentConfigSchema = z.object({
}),
)
.optional(),

/**
* Function that can be used to augment prompts before they're evaluated.
*/
augmentExecutablePrompt: z
.function(z.tuple([z.custom<PromptAugmentationContext>()]), z.promise(z.string()))
.optional(),

/**
* Function that can be used to augment generated files before they're evaluated.
*/
augmentGeneratedFile: z
.function(z.tuple([z.custom<Readonly<LlmResponseFile>>()]), z.string())
.optional(),
});

/**
Expand All @@ -127,6 +148,16 @@ export const environmentConfigSchema = z.object({
export type EnvironmentConfig = z.infer<typeof environmentConfigSchema> &
Partial<LocalExecutorConfig>;

/** Context passed to the `augmentExecutablePrompt` function. */
export interface PromptAugmentationContext {
/** Definition being augmented. */
promptDef: PromptDefinition;
/** Environment running the evaluation. */
environment: Environment;
/** Runner that the user can use for augmentation. */
runner: GenkitRunner;
}

/** Asserts that the specified data is a valid environment config. */
export function assertIsEnvironmentConfig(value: unknown): asserts value is EnvironmentConfig {
const validationResult = environmentConfigSchema
Expand Down
93 changes: 76 additions & 17 deletions runner/configuration/environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {Executor} from '../orchestration/executors/executor.js';
import {Rating, RatingCategory} from '../ratings/rating-types.js';
import {
FrameworkInfo,
LlmResponseFile,
MultiStepPromptDefinition,
PromptDefinition,
RatingContextFilter,
Expand All @@ -14,11 +15,13 @@ import {
import {UserFacingError} from '../utils/errors.js';
import {generateId} from '../utils/id-generation.js';
import {lazy} from '../utils/lazy-creation.js';
import {EnvironmentConfig} from './environment-config.js';
import {EnvironmentConfig, PromptAugmentationContext} from './environment-config.js';
import {EvalPromptWithMetadata, MultiStepPrompt} from './prompts.js';
import {renderPromptTemplate} from './prompt-templating.js';
import {getSha256Hash} from '../utils/hashing.js';
import {DEFAULT_SUMMARY_MODEL} from './constants.js';
import type {GenkitRunner} from '../codegen/genkit/genkit-runner.js';
import {getRunnerByName} from '../codegen/runner-creation.js';

interface CategoryConfig {
name: string;
Expand Down Expand Up @@ -73,6 +76,17 @@ export class Environment {
/** Ratings configured at the environment level. */
private readonly ratings: Rating[];

/** User-configured function used to augment prompts. */
private readonly augmentExecutablePrompt:
| ((context: PromptAugmentationContext) => Promise<string>)
| null;

/** Runner that user can use to access an LLM to augment prompts. */
private augmentationRunner: GenkitRunner | null = null;

/** User-provided callback for augmenting the LLM-generated files. */
private readonly augmentFileCallback: ((file: LlmResponseFile) => string) | null;

constructor(
rootPath: string,
private readonly config: EnvironmentConfig & Required<Pick<EnvironmentConfig, 'executor'>>,
Expand Down Expand Up @@ -103,26 +117,28 @@ export class Environment {
this.ratings = this.resolveRatings(config);
this.ratingHash = this.getRatingHash(this.ratings, this.ratingCategories);
this.analysisPrompts = this.resolveAnalysisPrompts(config);
this.augmentExecutablePrompt = config.augmentExecutablePrompt || null;
this.augmentFileCallback = config.augmentGeneratedFile || null;
this.validateRatingHash(this.ratingHash, config);
}

/** Prompts that should be executed as a part of the evaluation. */
executablePrompts = lazy(async () => {
readonly executablePrompts = lazy(async () => {
return this.resolveExecutablePrompts(this.config.executablePrompts);
});

systemPromptGeneration = lazy(async () => {
readonly systemPromptGeneration = lazy(async () => {
return (await this.renderSystemPrompt(this.config.generationSystemPrompt)).result;
});

systemPromptRepair = lazy(async () => {
readonly systemPromptRepair = lazy(async () => {
if (!this.config.repairSystemPrompt) {
return 'Please fix the given errors and return the corrected code.';
}
return (await this.renderSystemPrompt(this.config.repairSystemPrompt)).result;
});

systemPromptEditing = lazy(async () => {
readonly systemPromptEditing = lazy(async () => {
if (!this.config.editingSystemPrompt) {
return this.systemPromptGeneration();
}
Expand Down Expand Up @@ -180,6 +196,21 @@ export class Environment {
});
}

/** Augments response files based on the user's configuration. */
augmentResponseFiles(files: LlmResponseFile[]): void {
if (this.augmentFileCallback) {
files.forEach(file => (file.code = this.augmentFileCallback!(file)));
}
}

async destroy(): Promise<void> {
await this.executor.destroy();

if (this.augmentationRunner) {
await this.augmentationRunner.dispose();
}
}

/**
* Gets the readable display name of a framework, based on its ID.
* @param id ID to be resolved.
Expand Down Expand Up @@ -209,16 +240,16 @@ export class Environment {
* @param config Configuration for the environment.
*/
private async resolveExecutablePrompts(
prompts: EnvironmentConfig['executablePrompts'],
definitions: EnvironmentConfig['executablePrompts'],
): Promise<RootPromptDefinition[]> {
const result: Promise<RootPromptDefinition>[] = [];
const promptPromises: Promise<RootPromptDefinition>[] = [];
const envRatings = this.ratings;

for (const def of prompts) {
for (const def of definitions) {
if (def instanceof MultiStepPrompt) {
result.push(this.getMultiStepPrompt(def, envRatings));
promptPromises.push(this.getMultiStepPrompt(def, envRatings));
} else if (def instanceof EvalPromptWithMetadata) {
result.push(
promptPromises.push(
Promise.resolve({
name: def.name,
kind: 'single',
Expand All @@ -243,10 +274,10 @@ export class Environment {
name = def.name;
}

result.push(
promptPromises.push(
...globSync(path, {cwd: this.rootPath}).map(
async relativePath =>
await this.getStepPromptDefinition(
await this.getSinglePromptDefinition(
name ?? basename(relativePath, extname(relativePath)),
relativePath,
ratings,
Expand All @@ -258,19 +289,47 @@ export class Environment {
}
}

return Promise.all(result);
const prompts = await Promise.all(promptPromises);

if (this.augmentExecutablePrompt) {
const augmentationPromises: Promise<unknown>[] = [];
const updatePrompt = (promptDef: PromptDefinition) => {
augmentationPromises.push(
this.augmentExecutablePrompt!({
promptDef,
environment: this,
runner: this.augmentationRunner!,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quick question: would we have an access to the underlying executor as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the environment exposes the executor.

}).then(text => (promptDef.prompt = text)),
);
};
this.augmentationRunner ??= await getRunnerByName('genkit');

for (const rootPrompt of prompts) {
if (rootPrompt.kind === 'multi-step') {
for (const promptDef of rootPrompt.steps) {
updatePrompt(promptDef);
}
} else {
updatePrompt(rootPrompt);
}
}

await Promise.all(augmentationPromises);
}

return prompts;
}

/**
* Creates a prompt definition for a given step.
* Creates a prompt definition for a single prompt.
*
* @param name Name of the prompt.
* @param rootPath Root path of the project.
* @param relativePath Relative path to the prompt.
* @param ratings Ratings to run against the definition.
* @param isEditing Whether this is an editing or generation step.
*/
private async getStepPromptDefinition<Metadata>(
private async getSinglePromptDefinition<Metadata>(
name: string,
relativePath: string,
ratings: Rating[],
Expand Down Expand Up @@ -345,11 +404,11 @@ export class Environment {
if (stepNum === 0) {
throw new UserFacingError('Multi-step prompts start with `step-1`.');
}
const step = await this.getStepPromptDefinition(
const step = await this.getSinglePromptDefinition(
`${name}-step-${stepNum}`,
join(def.directoryPath, current.name),
ratings,
/*isEditing */ stepNum !== 1,
/* isEditing */ stepNum !== 1,
stepMetadata,
);

Expand Down
1 change: 1 addition & 0 deletions runner/orchestration/codegen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ export async function repairCodeWithAI(
);

if (response.success) {
env.augmentResponseFiles(response.outputFiles);
progress.log(
promptDef,
'codegen',
Expand Down
3 changes: 2 additions & 1 deletion runner/orchestration/generate-initial-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ export async function generateInitialFiles(
);

if (response.success) {
env.augmentResponseFiles(response.outputFiles);
progress.log(
promptDef,
'codegen',
Expand All @@ -90,7 +91,7 @@ export async function generateInitialFiles(
}

return {
files: response.outputFiles!,
files: response.outputFiles,
usage: response.usage,
reasoning: response.reasoning,
toolLogs: response.toolLogs,
Expand Down
18 changes: 13 additions & 5 deletions runner/orchestration/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,24 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
const cleanup = async () => {
// Clean-up should never interrupt a potentially passing completion.
try {
await env.executor.destroy();
for (const cleanupFn of extraCleanupFns) {
await cleanupFn();
}
await env.destroy();
} catch (e) {
console.error(`Failed to destroy executor: ${e}`);
console.error(`Failed to destroy environment: ${e}`);
if (e instanceof Error) {
console.error(e.stack);
}
}

for (const cleanupFn of extraCleanupFns) {
try {
await cleanupFn();
} catch (e) {
console.error(`Failed cleanup: ${e}`);
if (e instanceof Error) {
console.error(e.stack);
}
}
}
};

// Ensure cleanup logic runs when the evaluation is aborted.
Expand Down
2 changes: 1 addition & 1 deletion runner/orchestration/repair.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ async function handleRepairResponse(
const newAttemptFiles = previousAttemptFiles.map(f => ({...f}));

mergeRepairFiles(repairResponse.outputFiles, newAttemptFiles);
writeResponseFiles(directory, newAttemptFiles, env, rootPromptDef.name);
await writeResponseFiles(directory, newAttemptFiles, env, rootPromptDef.name);

const buildResult = await runBuild(
evalID,
Expand Down
Loading