Skip to content

Commit 67198fa

Browse files
charleslienjahooma
andauthored
SDK & Claude Code evals (#246)
Co-authored-by: James Grugett <jahooma@gmail.com>
1 parent 0fa4aa4 commit 67198fa

File tree

41 files changed

+609
-296
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+609
-296
lines changed

.github/workflows/npm-app-release-prod.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ jobs:
9696
new-version: ${{ needs.prepare-and-commit-prod.outputs.new_version }}
9797
artifact-name: updated-package
9898
checkout-ref: ${{ github.sha }}
99-
env-overrides: '{"NEXT_PUBLIC_BACKEND_URL": "manicode-backend.onrender.com", "NEXT_PUBLIC_CB_ENVIRONMENT": "prod"}'
99+
env-overrides: '{"NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "manicode-backend.onrender.com", "NEXT_PUBLIC_CB_ENVIRONMENT": "prod"}'
100100
secrets: inherit
101101

102102
# Create GitHub release with all binaries

.github/workflows/npm-app-release-staging.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ jobs:
134134
new-version: ${{ needs.prepare-and-commit-staging.outputs.new_version }}
135135
artifact-name: updated-staging-package
136136
checkout-ref: ${{ github.event.pull_request.head.sha }}
137-
env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_BACKEND_URL": "backend-pr-221-we0m.onrender.com"}'
137+
env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-221-we0m.onrender.com"}'
138138
secrets: inherit
139139

140140
# Create GitHub prerelease with all binaries

common/src/util/referral.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
export const getReferralLink = (referralCode: string): string =>
2-
`${process.env.NEXT_PUBLIC_APP_URL}/referrals/${referralCode}`
2+
`${process.env.NEXT_PUBLIC_CODEBUFF_APP_URL}/referrals/${referralCode}`

evals/bun.lock

Lines changed: 0 additions & 34 deletions
This file was deleted.

evals/git-evals/judge-git-eval.ts

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,22 +38,7 @@ function buildAnalysisPrompt(
3838
.join('\n\n---\n\n')
3939

4040
// Build Codebuff changes section
41-
const codebuffChanges = evalRun.fileStates
42-
.map((state) => {
43-
const diff = createPatch(state.path, state.preContent, state.postContent)
44-
let content = `File: ${state.path}\n\nUnified Diff (Codebuff's Changes):\n${diff}`
45-
46-
if (includeBeforeContent) {
47-
content += `\n\nPre-commit content:\n${state.preContent}`
48-
}
49-
50-
if (includeAfterContent) {
51-
content += `\n\nPost-commit content (Codebuff's Attempt):\n${state.postContent}`
52-
}
53-
54-
return content
55-
})
56-
.join('\n\n---\n\n')
41+
const codebuffChanges = evalRun.gitDiff
5742

5843
// Build trace section
5944
const traceContent =

evals/git-evals/post-eval-analysis.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ Judge Analysis: ${judging.analysis}
8282
Strengths: ${judging.strengths.join('; ')}
8383
Weaknesses: ${judging.weaknesses.join('; ')}
8484
85-
Files Changed by Codebuff: ${run.fileStates.map((f) => f.path).join(', ') || 'None'}
85+
Files Changed by Codebuff: ${run.gitDiff || 'None'}
8686
Ground Truth Files: ${run.eval_commit.fileStates.map((f) => f.path).join(', ')}
8787
8888
Trace Summary: ${run.trace.length} conversation turns

evals/git-evals/run-eval-set.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ class RunEvalSetCommand extends Command {
6565
description: 'Number of concurrent evals to run',
6666
min: 1,
6767
}),
68+
'coding-agent': Flags.string({
69+
description: 'Coding agent to use',
70+
default: 'codebuff',
71+
}),
6872
help: Flags.help({ char: 'h' }),
6973
}
7074

@@ -83,6 +87,7 @@ async function runEvalSet(options: {
8387
insert: boolean
8488
title?: string
8589
concurrency?: number
90+
'coding-agent': string
8691
}): Promise<void> {
8792
const {
8893
'output-dir': outputDir,
@@ -91,8 +96,14 @@ async function runEvalSet(options: {
9196
mock: mockEval,
9297
insert: shouldInsert,
9398
title,
99+
'coding-agent': codingAgentstr,
94100
} = options
95101

102+
if (!['codebuff', 'claude'].includes(codingAgentstr)) {
103+
throw new Error(`Invalid coding agent: ${codingAgentstr}`)
104+
}
105+
const codingAgent = codingAgentstr as 'codebuff' | 'claude'
106+
96107
console.log('Starting eval set run...')
97108
console.log(`Output directory: ${outputDir}`)
98109

@@ -148,7 +159,7 @@ async function runEvalSet(options: {
148159
: await runGitEvals(
149160
config.evalDataPath,
150161
config.outputDir,
151-
config.agentType,
162+
codingAgent,
152163
config.limit,
153164
options.concurrency === 1,
154165
)

0 commit comments

Comments
 (0)