diff --git a/.changeset/sampling-improvements.md b/.changeset/sampling-improvements.md
new file mode 100644
index 000000000..304caaf40
--- /dev/null
+++ b/.changeset/sampling-improvements.md
@@ -0,0 +1,5 @@
+---
+"@hyperdx/app": patch
+---
+
+feat: deterministic sampling with adaptive sample size for Event Deltas
diff --git a/packages/app/src/components/DBDeltaChart.tsx b/packages/app/src/components/DBDeltaChart.tsx
index fd5bd68de..d38d44ff9 100644
--- a/packages/app/src/components/DBDeltaChart.tsx
+++ b/packages/app/src/components/DBDeltaChart.tsx
@@ -23,9 +23,11 @@ import { getFirstTimestampValueExpression } from '@/source';
import { SQLPreview } from './ChartSQLPreview';
import {
getPropertyStatistics,
+ getStableSampleExpression,
isDenylisted,
isHighCardinality,
mergeValueStatisticsMaps,
+ SAMPLE_SIZE,
} from './deltaChartUtils';
import {
CHART_GAP,
@@ -42,6 +44,7 @@ export default function DBDeltaChart({
xMax,
yMin,
yMax,
+ spanIdExpression,
}: {
config: ChartConfigWithDateRange;
valueExpr: string;
@@ -49,10 +52,14 @@ export default function DBDeltaChart({
xMax: number;
yMin: number;
yMax: number;
+ spanIdExpression?: string;
}) {
// Determine if the value expression uses aggregate functions
const isAggregate = isAggregateFunction(valueExpr);
+ // Build deterministic ORDER BY expression from source's spanIdExpression
+ const stableSampleExpr = getStableSampleExpression(spanIdExpression);
+
// Get the timestamp expression from config
const timestampExpr = getFirstTimestampValueExpression(
config.timestampValueExpression,
@@ -136,8 +143,8 @@ export default function DBDeltaChart({
]
: []),
],
- orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
- limit: { limit: 1000 },
+ orderBy: [{ ordering: 'DESC', valueExpression: stableSampleExpr }],
+ limit: { limit: SAMPLE_SIZE },
},
},
];
@@ -191,8 +198,8 @@ export default function DBDeltaChart({
with: buildWithClauses(true),
select: '*',
filters: buildFilters(true),
- orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
- limit: { limit: 1000 },
+ orderBy: [{ ordering: 'DESC', valueExpression: stableSampleExpr }],
+ limit: { limit: SAMPLE_SIZE },
});
const { data: inlierData } = useQueriedChartConfig({
@@ -200,8 +207,8 @@ export default function DBDeltaChart({
with: buildWithClauses(false),
select: '*',
filters: buildFilters(false),
- orderBy: [{ ordering: 'DESC', valueExpression: 'rand()' }],
- limit: { limit: 1000 },
+ orderBy: [{ ordering: 'DESC', valueExpression: stableSampleExpr }],
+ limit: { limit: SAMPLE_SIZE },
});
// Column metadata for field classification (from ClickHouse response)
diff --git a/packages/app/src/components/Search/DBSearchHeatmapChart.tsx b/packages/app/src/components/Search/DBSearchHeatmapChart.tsx
index 1b710ea1c..fea4425cd 100644
--- a/packages/app/src/components/Search/DBSearchHeatmapChart.tsx
+++ b/packages/app/src/components/Search/DBSearchHeatmapChart.tsx
@@ -119,6 +119,7 @@ export function DBSearchHeatmapChart({
xMax={fields.xMax}
yMin={fields.yMin}
yMax={fields.yMax}
+ spanIdExpression={source.spanIdExpression}
/>
) : (
diff --git a/packages/app/src/components/__tests__/deltaChartSampling.test.ts b/packages/app/src/components/__tests__/deltaChartSampling.test.ts
new file mode 100644
index 000000000..7403ec0eb
--- /dev/null
+++ b/packages/app/src/components/__tests__/deltaChartSampling.test.ts
@@ -0,0 +1,57 @@
+import {
+ computeEffectiveSampleSize,
+ getStableSampleExpression,
+ MAX_SAMPLE_SIZE,
+ MIN_SAMPLE_SIZE,
+ SAMPLE_RATIO,
+ SAMPLE_SIZE,
+} from '../deltaChartUtils';
+
+describe('getStableSampleExpression', () => {
+ it('returns cityHash64 of spanIdExpression when provided', () => {
+ expect(getStableSampleExpression('SpanId')).toBe('cityHash64(SpanId)');
+ });
+
+ it('uses custom spanId column name', () => {
+ expect(getStableSampleExpression('my_span_id')).toBe(
+ 'cityHash64(my_span_id)',
+ );
+ });
+
+ it('falls back to rand() when spanIdExpression is undefined', () => {
+ expect(getStableSampleExpression(undefined)).toBe('rand()');
+ });
+
+ it('falls back to rand() when spanIdExpression is empty', () => {
+ expect(getStableSampleExpression('')).toBe('rand()');
+ });
+});
+
+describe('computeEffectiveSampleSize', () => {
+ it('returns SAMPLE_SIZE when totalCount is 0 (fallback)', () => {
+ expect(computeEffectiveSampleSize(0)).toBe(SAMPLE_SIZE);
+ });
+
+ it('returns SAMPLE_SIZE when totalCount is negative', () => {
+ expect(computeEffectiveSampleSize(-1)).toBe(SAMPLE_SIZE);
+ });
+
+ it('returns MIN_SAMPLE_SIZE for small datasets', () => {
+ expect(computeEffectiveSampleSize(100)).toBe(MIN_SAMPLE_SIZE);
+ });
+
+ it('returns SAMPLE_RATIO * totalCount for mid-size datasets', () => {
+ const result = computeEffectiveSampleSize(200_000);
+ expect(result).toBe(Math.ceil(200_000 * SAMPLE_RATIO));
+ expect(result).toBeGreaterThan(MIN_SAMPLE_SIZE);
+ expect(result).toBeLessThan(MAX_SAMPLE_SIZE);
+ });
+
+ it('caps at MAX_SAMPLE_SIZE for very large datasets', () => {
+ expect(computeEffectiveSampleSize(10_000_000)).toBe(MAX_SAMPLE_SIZE);
+ });
+
+ it('returns exact 1% for datasets where 1% falls in the valid range', () => {
+ expect(computeEffectiveSampleSize(100_000)).toBe(1000);
+ });
+});
diff --git a/packages/app/src/components/deltaChartUtils.ts b/packages/app/src/components/deltaChartUtils.ts
index cad5013bf..1fe08c793 100644
--- a/packages/app/src/components/deltaChartUtils.ts
+++ b/packages/app/src/components/deltaChartUtils.ts
@@ -283,3 +283,43 @@ export function isHighCardinality(
return effectiveUniqueness > 0.9;
}
+
+// ---------------------------------------------------------------------------
+// Sampling configuration
+// ---------------------------------------------------------------------------
+
+/** Default number of rows sampled when the total count is unknown */
+export const SAMPLE_SIZE = 1000;
+
+/** Minimum number of rows to sample */
+export const MIN_SAMPLE_SIZE = 500;
+
+/** Maximum number of rows to sample */
+export const MAX_SAMPLE_SIZE = 5000;
+
+/** Fraction of total rows to sample (e.g., 0.01 = 1%) */
+export const SAMPLE_RATIO = 0.01;
+
+/**
+ * Builds a deterministic ORDER BY expression for stable sampling.
+ * Uses the source's spanIdExpression when available, falls back to rand().
+ */
+export function getStableSampleExpression(spanIdExpression?: string): string {
+ if (spanIdExpression) {
+ return `cityHash64(${spanIdExpression})`;
+ }
+ return 'rand()';
+}
+
+/**
+ * Computes the effective sample size based on total row count.
+ * Adaptive formula: clamp(MIN_SAMPLE_SIZE, ceil(totalCount * SAMPLE_RATIO), MAX_SAMPLE_SIZE).
+ * Returns SAMPLE_SIZE as fallback when totalCount is 0 or unavailable.
+ */
+export function computeEffectiveSampleSize(totalCount: number): number {
+ if (totalCount <= 0) return SAMPLE_SIZE;
+ return Math.min(
+ MAX_SAMPLE_SIZE,
+ Math.max(MIN_SAMPLE_SIZE, Math.ceil(totalCount * SAMPLE_RATIO)),
+ );
+}