-
Notifications
You must be signed in to change notification settings - Fork 0
perf: benchmarks #35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
perf: benchmarks #35
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,280 @@ | ||
| name: Benchmarks | ||
|
|
||
| on: | ||
| workflow_dispatch: | ||
| inputs: | ||
| iterations: | ||
| description: "Number of iterations for realistic workload benchmark" | ||
| required: false | ||
| default: "200" | ||
| qps_duration: | ||
| description: "Duration in seconds for each QPS level" | ||
| required: false | ||
| default: "10" | ||
| compare_with: | ||
| description: "Run ID to compare results against (optional)" | ||
| required: false | ||
| default: "" | ||
|
|
||
| jobs: | ||
| benchmark: | ||
| name: Run Benchmarks | ||
| runs-on: ubuntu-latest | ||
| timeout-minutes: 30 | ||
|
|
||
| steps: | ||
| - name: Checkout | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Install uv | ||
| uses: astral-sh/setup-uv@v4 | ||
| with: | ||
| version: "latest" | ||
|
|
||
| - name: Setup Python | ||
| run: uv python install 3.9 | ||
|
|
||
| - name: Cache uv + Python installs + venv | ||
| uses: actions/cache@v4 | ||
| with: | ||
| path: | | ||
| ~/.cache/uv | ||
| ~/.local/share/uv/python | ||
| .venv | ||
| key: ${{ runner.os }}-uv-benchmark-3.9-${{ hashFiles('uv.lock') }} | ||
|
|
||
| - name: Install dependencies | ||
| run: | | ||
| uv sync --all-extras | ||
| uv pip install flask requests psutil | ||
|
|
||
| - name: Get system info | ||
| id: sysinfo | ||
| run: | | ||
| echo "python_version=$(python --version)" >> $GITHUB_OUTPUT | ||
| echo "os=$(uname -s)" >> $GITHUB_OUTPUT | ||
| echo "arch=$(uname -m)" >> $GITHUB_OUTPUT | ||
| echo "cpu_count=$(nproc)" >> $GITHUB_OUTPUT | ||
| echo "memory_gb=$(free -g | awk '/^Mem:/{print $2}')" >> $GITHUB_OUTPUT | ||
|
|
||
| - name: Run realistic workload benchmark | ||
| id: realistic | ||
| env: | ||
| BENCHMARK_ITERATIONS: ${{ inputs.iterations }} | ||
| run: | | ||
| uv run python benchmarks/bench/realistic_workload.py 2>&1 | tee realistic_output.txt | ||
| # Extract just the results JSON | ||
| cat benchmarks/results/realistic-workload.json | ||
|
|
||
| - name: Run fixed QPS latency benchmark | ||
| id: fixed_qps | ||
| env: | ||
| BENCHMARK_QPS_DURATION: ${{ inputs.qps_duration }} | ||
| run: | | ||
| uv run python benchmarks/bench/fixed_qps_latency.py 2>&1 | tee fixed_qps_output.txt | ||
cubic-dev-ai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # Extract just the results JSON | ||
| cat benchmarks/results/fixed-qps-latency.json | ||
|
|
||
| - name: Generate structured results | ||
| id: results | ||
| run: | | ||
| cat > benchmarks/results/benchmark-summary.json << 'EOF' | ||
| { | ||
| "metadata": { | ||
| "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", | ||
| "run_id": "${{ github.run_id }}", | ||
| "run_number": "${{ github.run_number }}", | ||
| "commit_sha": "${{ github.sha }}", | ||
| "branch": "${{ github.ref_name }}", | ||
| "triggered_by": "${{ github.actor }}", | ||
| "environment": { | ||
| "python_version": "${{ steps.sysinfo.outputs.python_version }}", | ||
| "os": "${{ steps.sysinfo.outputs.os }}", | ||
| "arch": "${{ steps.sysinfo.outputs.arch }}", | ||
| "cpu_count": "${{ steps.sysinfo.outputs.cpu_count }}", | ||
| "memory_gb": "${{ steps.sysinfo.outputs.memory_gb }}" | ||
| } | ||
| } | ||
| } | ||
| EOF | ||
|
|
||
| # Create a proper JSON with jq | ||
| jq -n \ | ||
| --slurpfile realistic benchmarks/results/realistic-workload.json \ | ||
| --slurpfile fixed_qps benchmarks/results/fixed-qps-latency.json \ | ||
| --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ | ||
| --arg run_id "${{ github.run_id }}" \ | ||
| --arg run_number "${{ github.run_number }}" \ | ||
| --arg commit_sha "${{ github.sha }}" \ | ||
| --arg branch "${{ github.ref_name }}" \ | ||
| --arg triggered_by "${{ github.actor }}" \ | ||
| --arg python_version "${{ steps.sysinfo.outputs.python_version }}" \ | ||
| --arg os "${{ steps.sysinfo.outputs.os }}" \ | ||
| --arg arch "${{ steps.sysinfo.outputs.arch }}" \ | ||
| --arg cpu_count "${{ steps.sysinfo.outputs.cpu_count }}" \ | ||
| --arg memory_gb "${{ steps.sysinfo.outputs.memory_gb }}" \ | ||
| '{ | ||
| metadata: { | ||
| timestamp: $timestamp, | ||
| run_id: $run_id, | ||
| run_number: ($run_number | tonumber), | ||
| commit_sha: $commit_sha, | ||
| branch: $branch, | ||
| triggered_by: $triggered_by, | ||
| environment: { | ||
| python_version: $python_version, | ||
| os: $os, | ||
| arch: $arch, | ||
| cpu_count: ($cpu_count | tonumber), | ||
| memory_gb: ($memory_gb | tonumber) | ||
| } | ||
| }, | ||
| realistic_workload: $realistic[0], | ||
| fixed_qps_latency: $fixed_qps[0] | ||
| }' > benchmarks/results/benchmark-summary.json | ||
|
|
||
| - name: Generate markdown summary | ||
| run: | | ||
| SUMMARY_FILE="benchmarks/results/benchmark-summary.md" | ||
|
|
||
| cat > "$SUMMARY_FILE" << EOF | ||
| # Benchmark Results | ||
|
|
||
| **Date**: $(date -u +%Y-%m-%d) | ||
| **Commit**: ${{ github.sha }} | ||
| **Branch**: ${{ github.ref_name }} | ||
| **Run ID**: ${{ github.run_id }} | ||
|
|
||
| ## Environment | ||
| - Python: ${{ steps.sysinfo.outputs.python_version }} | ||
| - OS: ${{ steps.sysinfo.outputs.os }} (${{ steps.sysinfo.outputs.arch }}) | ||
| - CPUs: ${{ steps.sysinfo.outputs.cpu_count }} | ||
| - Memory: ${{ steps.sysinfo.outputs.memory_gb }} GB | ||
|
|
||
| ## Realistic Workload Results | ||
|
|
||
| EOF | ||
|
|
||
| # Parse and format realistic workload results | ||
| jq -r ' | ||
| "| Endpoint | Baseline | SDK (100%) | Overhead | SDK (10%) | Overhead |", | ||
| "|----------|----------|------------|----------|-----------|----------|", | ||
| (.comparison_100 | to_entries[] | | ||
| "| \(.key) | \(.value.baseline_mean_ms | . * 10 | round / 10)ms | \(.value.sdk_mean_ms | . * 10 | round / 10)ms | +\(.value.mean_overhead_ms | . * 10 | round / 10)ms (\(.value.mean_overhead_pct | round)%) | - | - |" | ||
| ) | ||
| ' benchmarks/results/realistic-workload.json >> "$SUMMARY_FILE" | ||
|
|
||
| cat >> "$SUMMARY_FILE" << 'EOF' | ||
|
|
||
| ## Fixed QPS Latency Results | ||
|
|
||
| ### Mean Latency | ||
|
|
||
| EOF | ||
|
|
||
| jq -r ' | ||
| "| QPS | Baseline | SDK (100%) | Overhead | SDK (10%) | Overhead |", | ||
| "|-----|----------|------------|----------|-----------|----------|", | ||
| (.baseline | to_entries[] | | ||
| . as $b | | ||
| ($b.key | tostring) as $qps | | ||
| "| \($qps) | \($b.value.mean_ms | . * 10 | round / 10)ms | - | - | - | - |" | ||
| ) | ||
| ' benchmarks/results/fixed-qps-latency.json >> "$SUMMARY_FILE" | ||
|
|
||
| cat >> "$SUMMARY_FILE" << 'EOF' | ||
|
|
||
| --- | ||
|
|
||
| 📊 **Full results available in artifacts** | ||
|
|
||
| EOF | ||
|
|
||
| # Also write to GitHub step summary for UI display | ||
| cat "$SUMMARY_FILE" >> $GITHUB_STEP_SUMMARY | ||
|
|
||
| - name: Upload benchmark results | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: benchmark-results-${{ github.run_id }} | ||
| path: | | ||
| benchmarks/results/*.json | ||
| benchmarks/results/*.md | ||
| realistic_output.txt | ||
| fixed_qps_output.txt | ||
| retention-days: 90 | ||
|
|
||
| - name: Download comparison results (if specified) | ||
| if: ${{ inputs.compare_with != '' }} | ||
| uses: actions/download-artifact@v4 | ||
| with: | ||
| name: benchmark-results-${{ inputs.compare_with }} | ||
| path: benchmarks/results/comparison/ | ||
| continue-on-error: true | ||
|
|
||
| - name: Compare with previous run | ||
| if: ${{ inputs.compare_with != '' }} | ||
| run: | | ||
| if [ -f benchmarks/results/comparison/benchmark-summary.json ]; then | ||
| echo "## Comparison with Run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY | ||
| echo "" >> $GITHUB_STEP_SUMMARY | ||
|
|
||
| # Compare realistic workload results | ||
| PREV_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json) | ||
| CURR_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/benchmark-summary.json) | ||
|
|
||
| PREV_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json) | ||
| CURR_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/benchmark-summary.json) | ||
|
|
||
| echo "| Metric | Previous | Current | Delta |" >> $GITHUB_STEP_SUMMARY | ||
| echo "|--------|----------|---------|-------|" >> $GITHUB_STEP_SUMMARY | ||
| echo "| Read API overhead | ${PREV_READ}ms | ${CURR_READ}ms | $(echo "$CURR_READ - $PREV_READ" | bc)ms |" >> $GITHUB_STEP_SUMMARY | ||
| echo "| Write API overhead | ${PREV_WRITE}ms | ${CURR_WRITE}ms | $(echo "$CURR_WRITE - $PREV_WRITE" | bc)ms |" >> $GITHUB_STEP_SUMMARY | ||
| else | ||
| echo "⚠️ Could not find comparison results for run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY | ||
| fi | ||
|
|
||
| - name: Check for performance regression | ||
| id: regression | ||
| run: | | ||
| # Check if overhead exceeds threshold (3ms for 100% sampling) | ||
| THRESHOLD_MS=3.0 | ||
|
|
||
| READ_OVERHEAD=$(jq '.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/realistic-workload.json) | ||
| WRITE_OVERHEAD=$(jq '.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/realistic-workload.json) | ||
| MIXED_OVERHEAD=$(jq '.comparison_100.realistic_mixed.mean_overhead_ms' benchmarks/results/realistic-workload.json) | ||
|
|
||
| REGRESSION=false | ||
|
|
||
| if (( $(echo "$READ_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then | ||
| echo "⚠️ Read API overhead ($READ_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY | ||
| REGRESSION=true | ||
| fi | ||
|
|
||
| if (( $(echo "$WRITE_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then | ||
| echo "⚠️ Write API overhead ($WRITE_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY | ||
| REGRESSION=true | ||
| fi | ||
|
|
||
| if (( $(echo "$MIXED_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then | ||
| echo "⚠️ Mixed API overhead ($MIXED_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY | ||
| REGRESSION=true | ||
| fi | ||
|
|
||
| if [ "$REGRESSION" = true ]; then | ||
| echo "" >> $GITHUB_STEP_SUMMARY | ||
| echo "### ⚠️ Performance regression detected" >> $GITHUB_STEP_SUMMARY | ||
| echo "regression=true" >> $GITHUB_OUTPUT | ||
| else | ||
| echo "" >> $GITHUB_STEP_SUMMARY | ||
| echo "### ✅ No performance regression detected" >> $GITHUB_STEP_SUMMARY | ||
| echo "regression=false" >> $GITHUB_OUTPUT | ||
| fi | ||
|
|
||
| - name: Output JSON results | ||
| run: | | ||
| echo "### Structured Results (JSON)" | ||
| echo "" | ||
| echo '```json' | ||
| cat benchmarks/results/benchmark-summary.json | ||
| echo '```' | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Benchmark results (regenerated each run) | ||
| results/ | ||
|
|
||
| # Trace directories created during benchmarks | ||
| .benchmark-traces*/ |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.