Skip to content

Commit 2123bae

Browse files
pablogsalfatelei
authored andcommitted
gh-142374: Fix recursive function cumulative over-counting in sampling profiler (#142378)
1 parent c8d846f commit 2123bae

File tree

9 files changed

+454
-55
lines changed

9 files changed

+454
-55
lines changed

Lib/profiling/sampling/heatmap_collector.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,10 @@ def __init__(self, *args, **kwargs):
491491
# File index (populated during export)
492492
self.file_index = {}
493493

494+
# Reusable set for deduplicating line locations within a single sample.
495+
# This avoids over-counting recursive functions in cumulative stats.
496+
self._seen_lines = set()
497+
494498
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs):
495499
"""Set profiling statistics to include in heatmap output.
496500
@@ -524,6 +528,7 @@ def process_frames(self, frames, thread_id):
524528
thread_id: Thread ID for this stack trace
525529
"""
526530
self._total_samples += 1
531+
self._seen_lines.clear()
527532

528533
for i, (filename, location, funcname, opcode) in enumerate(frames):
529534
# Normalize location to 4-tuple format
@@ -533,7 +538,14 @@ def process_frames(self, frames, thread_id):
533538
continue
534539

535540
# frames[0] is the leaf - where execution is actually happening
536-
self._record_line_sample(filename, lineno, funcname, is_leaf=(i == 0))
541+
is_leaf = (i == 0)
542+
line_key = (filename, lineno)
543+
count_cumulative = line_key not in self._seen_lines
544+
if count_cumulative:
545+
self._seen_lines.add(line_key)
546+
547+
self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf,
548+
count_cumulative=count_cumulative)
537549

538550
if opcode is not None:
539551
# Set opcodes_enabled flag when we first encounter opcode data
@@ -562,11 +574,13 @@ def _is_valid_frame(self, filename, lineno):
562574

563575
return True
564576

565-
def _record_line_sample(self, filename, lineno, funcname, is_leaf=False):
577+
def _record_line_sample(self, filename, lineno, funcname, is_leaf=False,
578+
count_cumulative=True):
566579
"""Record a sample for a specific line."""
567580
# Track cumulative samples (all occurrences in stack)
568-
self.line_samples[(filename, lineno)] += 1
569-
self.file_samples[filename][lineno] += 1
581+
if count_cumulative:
582+
self.line_samples[(filename, lineno)] += 1
583+
self.file_samples[filename][lineno] += 1
570584

571585
# Track self/leaf samples (only when at top of stack)
572586
if is_leaf:

Lib/profiling/sampling/live_collector/collector.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ def __init__(
210210
# Trend tracking (initialized after colors are set up)
211211
self._trend_tracker = None
212212

213+
self._seen_locations = set()
214+
213215
@property
214216
def elapsed_time(self):
215217
"""Get the elapsed time, frozen when finished."""
@@ -305,15 +307,18 @@ def process_frames(self, frames, thread_id=None):
305307

306308
# Get per-thread data if tracking per-thread
307309
thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None
310+
self._seen_locations.clear()
308311

309312
# Process each frame in the stack to track cumulative calls
310313
# frame.location is (lineno, end_lineno, col_offset, end_col_offset), int, or None
311314
for frame in frames:
312315
lineno = extract_lineno(frame.location)
313316
location = (frame.filename, lineno, frame.funcname)
314-
self.result[location]["cumulative_calls"] += 1
315-
if thread_data:
316-
thread_data.result[location]["cumulative_calls"] += 1
317+
if location not in self._seen_locations:
318+
self._seen_locations.add(location)
319+
self.result[location]["cumulative_calls"] += 1
320+
if thread_data:
321+
thread_data.result[location]["cumulative_calls"] += 1
317322

318323
# The top frame gets counted as an inline call (directly executing)
319324
top_frame = frames[0]
@@ -371,11 +376,13 @@ def collect(self, stack_frames):
371376
thread_data.gc_frame_samples += stats["gc_samples"]
372377

373378
# Process frames using pre-selected iterator
379+
frames_processed = False
374380
for frames, thread_id in self._get_frame_iterator(stack_frames):
375381
if not frames:
376382
continue
377383

378384
self.process_frames(frames, thread_id=thread_id)
385+
frames_processed = True
379386

380387
# Track thread IDs
381388
if thread_id is not None and thread_id not in self.thread_ids:
@@ -388,7 +395,11 @@ def collect(self, stack_frames):
388395
if has_gc_frame:
389396
self.gc_frame_samples += 1
390397

391-
self.successful_samples += 1
398+
# Only count as successful if we actually processed frames
399+
# This is important for modes like --mode exception where most samples
400+
# may be filtered out at the C level
401+
if frames_processed:
402+
self.successful_samples += 1
392403
self.total_samples += 1
393404

394405
# Handle input on every sample for instant responsiveness
@@ -659,9 +670,11 @@ def build_stats_list(self):
659670
total_time = direct_calls * self.sample_interval_sec
660671
cumulative_time = cumulative_calls * self.sample_interval_sec
661672

662-
# Calculate sample percentages
663-
sample_pct = (direct_calls / self.total_samples * 100) if self.total_samples > 0 else 0
664-
cumul_pct = (cumulative_calls / self.total_samples * 100) if self.total_samples > 0 else 0
673+
# Calculate sample percentages using successful_samples as denominator
674+
# This ensures percentages are relative to samples that actually had data,
675+
# not all sampling attempts (important for filtered modes like --mode exception)
676+
sample_pct = (direct_calls / self.successful_samples * 100) if self.successful_samples > 0 else 0
677+
cumul_pct = (cumulative_calls / self.successful_samples * 100) if self.successful_samples > 0 else 0
665678

666679
# Calculate trends for all columns using TrendTracker
667680
trends = {}
@@ -684,7 +697,9 @@ def build_stats_list(self):
684697
"cumulative_calls": cumulative_calls,
685698
"total_time": total_time,
686699
"cumulative_time": cumulative_time,
687-
"trends": trends, # Dictionary of trends for all columns
700+
"sample_pct": sample_pct,
701+
"cumul_pct": cumul_pct,
702+
"trends": trends,
688703
}
689704
)
690705

@@ -696,21 +711,9 @@ def build_stats_list(self):
696711
elif self.sort_by == "cumtime":
697712
stats_list.sort(key=lambda x: x["cumulative_time"], reverse=True)
698713
elif self.sort_by == "sample_pct":
699-
stats_list.sort(
700-
key=lambda x: (x["direct_calls"] / self.total_samples * 100)
701-
if self.total_samples > 0
702-
else 0,
703-
reverse=True,
704-
)
714+
stats_list.sort(key=lambda x: x["sample_pct"], reverse=True)
705715
elif self.sort_by == "cumul_pct":
706-
stats_list.sort(
707-
key=lambda x: (
708-
x["cumulative_calls"] / self.total_samples * 100
709-
)
710-
if self.total_samples > 0
711-
else 0,
712-
reverse=True,
713-
)
716+
stats_list.sort(key=lambda x: x["cumul_pct"], reverse=True)
714717

715718
return stats_list
716719

Lib/profiling/sampling/live_collector/widgets.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,8 @@ def draw_thread_status(self, line, width):
396396
total_samples = max(1, thread_data.sample_count)
397397
pct_gc = (thread_data.gc_frame_samples / total_samples) * 100
398398
else:
399+
# Use total_samples for GC percentage since gc_frame_samples is tracked
400+
# across ALL samples (via thread status), not just successful ones
399401
total_samples = max(1, self.collector.total_samples)
400402
pct_gc = (self.collector.gc_frame_samples / total_samples) * 100
401403

@@ -529,10 +531,7 @@ def draw_top_functions(self, line, width, stats_list):
529531
continue
530532

531533
func_name = func_data["func"][2]
532-
func_pct = (
533-
func_data["direct_calls"]
534-
/ max(1, self.collector.total_samples)
535-
) * 100
534+
func_pct = func_data["sample_pct"]
536535

537536
# Medal emoji
538537
if col + 3 < width - 15:
@@ -765,19 +764,10 @@ def draw_stats_rows(self, line, height, width, stats_list, column_flags):
765764
cumulative_calls = stat["cumulative_calls"]
766765
total_time = stat["total_time"]
767766
cumulative_time = stat["cumulative_time"]
767+
sample_pct = stat["sample_pct"]
768+
cum_pct = stat["cumul_pct"]
768769
trends = stat.get("trends", {})
769770

770-
sample_pct = (
771-
(direct_calls / self.collector.total_samples * 100)
772-
if self.collector.total_samples > 0
773-
else 0
774-
)
775-
cum_pct = (
776-
(cumulative_calls / self.collector.total_samples * 100)
777-
if self.collector.total_samples > 0
778-
else 0
779-
)
780-
781771
# Check if this row is selected
782772
is_selected = show_opcodes and row_idx == selected_row
783773

Lib/profiling/sampling/pstats_collector.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,23 @@ def __init__(self, sample_interval_usec, *, skip_idle=False):
1616
lambda: collections.defaultdict(int)
1717
)
1818
self.skip_idle = skip_idle
19+
self._seen_locations = set()
1920

2021
def _process_frames(self, frames):
2122
"""Process a single thread's frame stack."""
2223
if not frames:
2324
return
2425

26+
self._seen_locations.clear()
27+
2528
# Process each frame in the stack to track cumulative calls
2629
# frame.location is int, tuple (lineno, end_lineno, col_offset, end_col_offset), or None
2730
for frame in frames:
2831
lineno = extract_lineno(frame.location)
29-
loc = (frame.filename, lineno, frame.funcname)
30-
self.result[loc]["cumulative_calls"] += 1
32+
location = (frame.filename, lineno, frame.funcname)
33+
if location not in self._seen_locations:
34+
self._seen_locations.add(location)
35+
self.result[location]["cumulative_calls"] += 1
3136

3237
# The top frame gets counted as an inline call (directly executing)
3338
top_lineno = extract_lineno(frames[0].location)

0 commit comments

Comments
 (0)