python
diff --git a/‎Lib/profiling/sampling/heatmap_collector.py‎
Lines changed: 18 additions & 4 deletions b/‎Lib/profiling/sampling/heatmap_collector.py‎
Lines changed: 18 additions & 4 deletions
diff --git a/‎Lib/profiling/sampling/live_collector/collector.py‎
Lines changed: 25 additions & 22 deletions b/‎Lib/profiling/sampling/live_collector/collector.py‎
Lines changed: 25 additions & 22 deletions
diff --git a/‎Lib/profiling/sampling/live_collector/widgets.py‎
Lines changed: 5 additions & 15 deletions b/‎Lib/profiling/sampling/live_collector/widgets.py‎
Lines changed: 5 additions & 15 deletions
diff --git a/‎Lib/profiling/sampling/pstats_collector.py‎
Lines changed: 7 additions & 2 deletions b/‎Lib/profiling/sampling/pstats_collector.py‎
Lines changed: 7 additions & 2 deletions
@@ -491,6 +491,10 @@ def __init__(self, *args, **kwargs):
         # File index (populated during export)
         self.file_index = {}
 
+        # Reusable set for deduplicating line locations within a single sample.
+        # This avoids over-counting recursive functions in cumulative stats.
+        self._seen_lines = set()
+
     def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs):
         """Set profiling statistics to include in heatmap output.
 
@@ -524,6 +528,7 @@ def process_frames(self, frames, thread_id):
             thread_id: Thread ID for this stack trace
         """
         self._total_samples += 1
+        self._seen_lines.clear()
 
         for i, (filename, location, funcname, opcode) in enumerate(frames):
             # Normalize location to 4-tuple format
@@ -533,7 +538,14 @@ def process_frames(self, frames, thread_id):
                 continue
 
             # frames[0] is the leaf - where execution is actually happening
-            self._record_line_sample(filename, lineno, funcname, is_leaf=(i == 0))
+            is_leaf = (i == 0)
+            line_key = (filename, lineno)
+            count_cumulative = line_key not in self._seen_lines
+            if count_cumulative:
+                self._seen_lines.add(line_key)
+
+            self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf,
+                                     count_cumulative=count_cumulative)
 
             if opcode is not None:
                 # Set opcodes_enabled flag when we first encounter opcode data
@@ -562,11 +574,13 @@ def _is_valid_frame(self, filename, lineno):
 
         return True
 
-    def _record_line_sample(self, filename, lineno, funcname, is_leaf=False):
+    def _record_line_sample(self, filename, lineno, funcname, is_leaf=False,
+                            count_cumulative=True):
         """Record a sample for a specific line."""
         # Track cumulative samples (all occurrences in stack)
-        self.line_samples[(filename, lineno)] += 1
-        self.file_samples[filename][lineno] += 1
+        if count_cumulative:
+            self.line_samples[(filename, lineno)] += 1
+            self.file_samples[filename][lineno] += 1
 
         # Track self/leaf samples (only when at top of stack)
         if is_leaf:
 
@@ -210,6 +210,8 @@ def __init__(
         # Trend tracking (initialized after colors are set up)
         self._trend_tracker = None
 
+        self._seen_locations = set()
+
     @property
     def elapsed_time(self):
         """Get the elapsed time, frozen when finished."""
@@ -305,15 +307,18 @@ def process_frames(self, frames, thread_id=None):
 
         # Get per-thread data if tracking per-thread
         thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None
+        self._seen_locations.clear()
 
         # Process each frame in the stack to track cumulative calls
         # frame.location is (lineno, end_lineno, col_offset, end_col_offset), int, or None
         for frame in frames:
             lineno = extract_lineno(frame.location)
             location = (frame.filename, lineno, frame.funcname)
-            self.result[location]["cumulative_calls"] += 1
-            if thread_data:
-                thread_data.result[location]["cumulative_calls"] += 1
+            if location not in self._seen_locations:
+                self._seen_locations.add(location)
+                self.result[location]["cumulative_calls"] += 1
+                if thread_data:
+                    thread_data.result[location]["cumulative_calls"] += 1
 
         # The top frame gets counted as an inline call (directly executing)
         top_frame = frames[0]
@@ -371,11 +376,13 @@ def collect(self, stack_frames):
                     thread_data.gc_frame_samples += stats["gc_samples"]
 
         # Process frames using pre-selected iterator
+        frames_processed = False
         for frames, thread_id in self._get_frame_iterator(stack_frames):
             if not frames:
                 continue
 
             self.process_frames(frames, thread_id=thread_id)
+            frames_processed = True
 
             # Track thread IDs
             if thread_id is not None and thread_id not in self.thread_ids:
@@ -388,7 +395,11 @@ def collect(self, stack_frames):
         if has_gc_frame:
             self.gc_frame_samples += 1
 
-        self.successful_samples += 1
+        # Only count as successful if we actually processed frames
+        # This is important for modes like --mode exception where most samples
+        # may be filtered out at the C level
+        if frames_processed:
+            self.successful_samples += 1
         self.total_samples += 1
 
         # Handle input on every sample for instant responsiveness
@@ -659,9 +670,11 @@ def build_stats_list(self):
             total_time = direct_calls * self.sample_interval_sec
             cumulative_time = cumulative_calls * self.sample_interval_sec
 
-            # Calculate sample percentages
-            sample_pct = (direct_calls / self.total_samples * 100) if self.total_samples > 0 else 0
-            cumul_pct = (cumulative_calls / self.total_samples * 100) if self.total_samples > 0 else 0
+            # Calculate sample percentages using successful_samples as denominator
+            # This ensures percentages are relative to samples that actually had data,
+            # not all sampling attempts (important for filtered modes like --mode exception)
+            sample_pct = (direct_calls / self.successful_samples * 100) if self.successful_samples > 0 else 0
+            cumul_pct = (cumulative_calls / self.successful_samples * 100) if self.successful_samples > 0 else 0
 
             # Calculate trends for all columns using TrendTracker
             trends = {}
@@ -684,7 +697,9 @@ def build_stats_list(self):
                     "cumulative_calls": cumulative_calls,
                     "total_time": total_time,
                     "cumulative_time": cumulative_time,
-                    "trends": trends,  # Dictionary of trends for all columns
+                    "sample_pct": sample_pct,
+                    "cumul_pct": cumul_pct,
+                    "trends": trends,
                 }
             )
 
@@ -696,21 +711,9 @@ def build_stats_list(self):
         elif self.sort_by == "cumtime":
             stats_list.sort(key=lambda x: x["cumulative_time"], reverse=True)
         elif self.sort_by == "sample_pct":
-            stats_list.sort(
-                key=lambda x: (x["direct_calls"] / self.total_samples * 100)
-                if self.total_samples > 0
-                else 0,
-                reverse=True,
-            )
+            stats_list.sort(key=lambda x: x["sample_pct"], reverse=True)
         elif self.sort_by == "cumul_pct":
-            stats_list.sort(
-                key=lambda x: (
-                    x["cumulative_calls"] / self.total_samples * 100
-                )
-                if self.total_samples > 0
-                else 0,
-                reverse=True,
-            )
+            stats_list.sort(key=lambda x: x["cumul_pct"], reverse=True)
 
         return stats_list
 
 
@@ -396,6 +396,8 @@ def draw_thread_status(self, line, width):
             total_samples = max(1, thread_data.sample_count)
             pct_gc = (thread_data.gc_frame_samples / total_samples) * 100
         else:
+            # Use total_samples for GC percentage since gc_frame_samples is tracked
+            # across ALL samples (via thread status), not just successful ones
             total_samples = max(1, self.collector.total_samples)
             pct_gc = (self.collector.gc_frame_samples / total_samples) * 100
 
@@ -529,10 +531,7 @@ def draw_top_functions(self, line, width, stats_list):
                 continue
 
             func_name = func_data["func"][2]
-            func_pct = (
-                func_data["direct_calls"]
-                / max(1, self.collector.total_samples)
-            ) * 100
+            func_pct = func_data["sample_pct"]
 
             # Medal emoji
             if col + 3 < width - 15:
@@ -765,19 +764,10 @@ def draw_stats_rows(self, line, height, width, stats_list, column_flags):
             cumulative_calls = stat["cumulative_calls"]
             total_time = stat["total_time"]
             cumulative_time = stat["cumulative_time"]
+            sample_pct = stat["sample_pct"]
+            cum_pct = stat["cumul_pct"]
             trends = stat.get("trends", {})
 
-            sample_pct = (
-                (direct_calls / self.collector.total_samples * 100)
-                if self.collector.total_samples > 0
-                else 0
-            )
-            cum_pct = (
-                (cumulative_calls / self.collector.total_samples * 100)
-                if self.collector.total_samples > 0
-                else 0
-            )
-
             # Check if this row is selected
             is_selected = show_opcodes and row_idx == selected_row
 
 
@@ -16,18 +16,23 @@ def __init__(self, sample_interval_usec, *, skip_idle=False):
             lambda: collections.defaultdict(int)
         )
         self.skip_idle = skip_idle
+        self._seen_locations = set()
 
     def _process_frames(self, frames):
         """Process a single thread's frame stack."""
         if not frames:
             return
 
+        self._seen_locations.clear()
+
         # Process each frame in the stack to track cumulative calls
         # frame.location is int, tuple (lineno, end_lineno, col_offset, end_col_offset), or None
         for frame in frames:
             lineno = extract_lineno(frame.location)
-            loc = (frame.filename, lineno, frame.funcname)
-            self.result[loc]["cumulative_calls"] += 1
+            location = (frame.filename, lineno, frame.funcname)
+            if location not in self._seen_locations:
+                self._seen_locations.add(location)
+                self.result[location]["cumulative_calls"] += 1
 
         # The top frame gets counted as an inline call (directly executing)
         top_lineno = extract_lineno(frames[0].location)