fixup! Merge upstream/main into gh-142374

pablogsal · pablogsal · commit 3be986622285 · 2025-12-12T00:07:34.000Z
diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py
@@ -376,11 +376,13 @@ def collect(self, stack_frames):
                     thread_data.gc_frame_samples += stats["gc_samples"]
 
         # Process frames using pre-selected iterator
+        frames_processed = False
         for frames, thread_id in self._get_frame_iterator(stack_frames):
             if not frames:
                 continue
 
             self.process_frames(frames, thread_id=thread_id)
+            frames_processed = True
 
             # Track thread IDs
             if thread_id is not None and thread_id not in self.thread_ids:
@@ -393,7 +395,11 @@ def collect(self, stack_frames):
         if has_gc_frame:
             self.gc_frame_samples += 1
 
-        self.successful_samples += 1
+        # Only count as successful if we actually processed frames
+        # This is important for modes like --mode exception where most samples
+        # may be filtered out at the C level
+        if frames_processed:
+            self.successful_samples += 1
         self.total_samples += 1
 
         # Handle input on every sample for instant responsiveness
@@ -664,9 +670,11 @@ def build_stats_list(self):
             total_time = direct_calls * self.sample_interval_sec
             cumulative_time = cumulative_calls * self.sample_interval_sec
 
-            # Calculate sample percentages
-            sample_pct = (direct_calls / self.total_samples * 100) if self.total_samples > 0 else 0
-            cumul_pct = (cumulative_calls / self.total_samples * 100) if self.total_samples > 0 else 0
+            # Calculate sample percentages using successful_samples as denominator
+            # This ensures percentages are relative to samples that actually had data,
+            # not all sampling attempts (important for filtered modes like --mode exception)
+            sample_pct = (direct_calls / self.successful_samples * 100) if self.successful_samples > 0 else 0
+            cumul_pct = (cumulative_calls / self.successful_samples * 100) if self.successful_samples > 0 else 0
 
             # Calculate trends for all columns using TrendTracker
             trends = {}
@@ -689,7 +697,9 @@ def build_stats_list(self):
                     "cumulative_calls": cumulative_calls,
                     "total_time": total_time,
                     "cumulative_time": cumulative_time,
-                    "trends": trends,  # Dictionary of trends for all columns
+                    "sample_pct": sample_pct,
+                    "cumul_pct": cumul_pct,
+                    "trends": trends,
                 }
             )
 
@@ -701,21 +711,9 @@ def build_stats_list(self):
         elif self.sort_by == "cumtime":
             stats_list.sort(key=lambda x: x["cumulative_time"], reverse=True)
         elif self.sort_by == "sample_pct":
-            stats_list.sort(
-                key=lambda x: (x["direct_calls"] / self.total_samples * 100)
-                if self.total_samples > 0
-                else 0,
-                reverse=True,
-            )
+            stats_list.sort(key=lambda x: x["sample_pct"], reverse=True)
         elif self.sort_by == "cumul_pct":
-            stats_list.sort(
-                key=lambda x: (
-                    x["cumulative_calls"] / self.total_samples * 100
-                )
-                if self.total_samples > 0
-                else 0,
-                reverse=True,
-            )
+            stats_list.sort(key=lambda x: x["cumul_pct"], reverse=True)
 
         return stats_list
 
diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py
@@ -396,6 +396,8 @@ def draw_thread_status(self, line, width):
             total_samples = max(1, thread_data.sample_count)
             pct_gc = (thread_data.gc_frame_samples / total_samples) * 100
         else:
+            # Use total_samples for GC percentage since gc_frame_samples is tracked
+            # across ALL samples (via thread status), not just successful ones
             total_samples = max(1, self.collector.total_samples)
             pct_gc = (self.collector.gc_frame_samples / total_samples) * 100
 
@@ -529,10 +531,7 @@ def draw_top_functions(self, line, width, stats_list):
                 continue
 
             func_name = func_data["func"][2]
-            func_pct = (
-                func_data["direct_calls"]
-                / max(1, self.collector.total_samples)
-            ) * 100
+            func_pct = func_data["sample_pct"]
 
             # Medal emoji
             if col + 3 < width - 15:
@@ -765,19 +764,10 @@ def draw_stats_rows(self, line, height, width, stats_list, column_flags):
             cumulative_calls = stat["cumulative_calls"]
             total_time = stat["total_time"]
             cumulative_time = stat["cumulative_time"]
+            sample_pct = stat["sample_pct"]
+            cum_pct = stat["cumul_pct"]
             trends = stat.get("trends", {})
 
-            sample_pct = (
-                (direct_calls / self.collector.total_samples * 100)
-                if self.collector.total_samples > 0
-                else 0
-            )
-            cum_pct = (
-                (cumulative_calls / self.collector.total_samples * 100)
-                if self.collector.total_samples > 0
-                else 0
-            )
-
             # Check if this row is selected
             is_selected = show_opcodes and row_idx == selected_row
 
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_core.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_core.py
@@ -275,8 +275,11 @@ def test_collect_with_empty_frames(self):
 
         collector.collect(stack_frames)
 
-        # Empty frames still count as successful since collect() was called successfully
-        self.assertEqual(collector.successful_samples, 1)
+        # Empty frames do NOT count as successful - this is important for
+        # filtered modes like --mode exception where most samples may have
+        # no matching data. Only samples with actual frame data are counted.
+        self.assertEqual(collector.successful_samples, 0)
+        self.assertEqual(collector.total_samples, 1)
         self.assertEqual(collector.failed_samples, 0)
 
     def test_collect_skip_idle_threads(self):
@@ -321,6 +324,124 @@ def test_collect_multiple_threads(self):
         self.assertIn(123, collector.thread_ids)
         self.assertIn(124, collector.thread_ids)
 
+    def test_collect_filtered_mode_percentage_calculation(self):
+        """Test that percentages use successful_samples, not total_samples.
+
+        This is critical for filtered modes like --mode exception where most
+        samples may be filtered out at the C level. The percentages should
+        be relative to samples that actually had frame data, not all attempts.
+        """
+        collector = LiveStatsCollector(1000)
+
+        # Simulate 10 samples where only 2 had matching data (e.g., exception mode)
+        frames_with_data = [MockFrameInfo("test.py", 10, "exception_handler")]
+        thread_with_data = MockThreadInfo(123, frames_with_data)
+        interpreter_with_data = MockInterpreterInfo(0, [thread_with_data])
+
+        # Empty thread simulates filtered-out data
+        thread_empty = MockThreadInfo(456, [])
+        interpreter_empty = MockInterpreterInfo(0, [thread_empty])
+
+        # 2 samples with data
+        collector.collect([interpreter_with_data])
+        collector.collect([interpreter_with_data])
+
+        # 8 samples without data (filtered out)
+        for _ in range(8):
+            collector.collect([interpreter_empty])
+
+        # Verify counts
+        self.assertEqual(collector.total_samples, 10)
+        self.assertEqual(collector.successful_samples, 2)
+
+        # Build stats and check percentage
+        stats_list = collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+
+        # The function appeared in 2 out of 2 successful samples = 100%
+        # NOT 2 out of 10 total samples = 20%
+        location = ("test.py", 10, "exception_handler")
+        self.assertEqual(collector.result[location]["direct_calls"], 2)
+
+        # Verify the percentage calculation in build_stats_list
+        # direct_calls / successful_samples * 100 = 2/2 * 100 = 100%
+        # This would be 20% if using total_samples incorrectly
+
+    def test_percentage_values_use_successful_samples(self):
+        """Test that percentages are calculated from successful_samples.
+
+        This verifies the fix where percentages use successful_samples (samples with
+        frame data) instead of total_samples (all sampling attempts). Critical for
+        filtered modes like --mode exception.
+        """
+        collector = LiveStatsCollector(1000)
+
+        # Simulate scenario: 100 total samples, only 20 had frame data
+        collector.total_samples = 100
+        collector.successful_samples = 20
+
+        # Function appeared in 10 out of 20 successful samples
+        collector.result[("test.py", 10, "handler")] = {
+            "direct_calls": 10,
+            "cumulative_calls": 15,
+            "total_rec_calls": 0,
+        }
+
+        stats_list = collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+
+        stat = stats_list[0]
+        # Calculate expected percentages using successful_samples
+        expected_sample_pct = stat["direct_calls"] / collector.successful_samples * 100
+        expected_cumul_pct = stat["cumulative_calls"] / collector.successful_samples * 100
+
+        # Percentage should be 10/20 * 100 = 50%, NOT 10/100 * 100 = 10%
+        self.assertAlmostEqual(expected_sample_pct, 50.0)
+        # Cumulative percentage should be 15/20 * 100 = 75%, NOT 15/100 * 100 = 15%
+        self.assertAlmostEqual(expected_cumul_pct, 75.0)
+
+        # Verify sorting by percentage works correctly
+        collector.result[("test.py", 20, "other")] = {
+            "direct_calls": 5,  # 25% of successful samples
+            "cumulative_calls": 8,
+            "total_rec_calls": 0,
+        }
+        collector.sort_by = "sample_pct"
+        stats_list = collector.build_stats_list()
+        # handler (50%) should come before other (25%)
+        self.assertEqual(stats_list[0]["func"][2], "handler")
+        self.assertEqual(stats_list[1]["func"][2], "other")
+
+    def test_build_stats_list_zero_successful_samples(self):
+        """Test build_stats_list handles zero successful_samples without division by zero.
+
+        When all samples are filtered out (e.g., exception mode with no exceptions),
+        percentage calculations should return 0 without raising ZeroDivisionError.
+        """
+        collector = LiveStatsCollector(1000)
+
+        # Edge case: data exists but no successful samples
+        collector.result[("test.py", 10, "func")] = {
+            "direct_calls": 10,
+            "cumulative_calls": 10,
+            "total_rec_calls": 0,
+        }
+        collector.total_samples = 100
+        collector.successful_samples = 0  # All samples filtered out
+
+        # Should not raise ZeroDivisionError
+        stats_list = collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+
+        # Verify percentage-based sorting also works with zero successful_samples
+        collector.sort_by = "sample_pct"
+        stats_list = collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+
+        collector.sort_by = "cumul_pct"
+        stats_list = collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+
 
 class TestLiveStatsCollectorStatisticsBuilding(unittest.TestCase):
     """Tests for statistics building and sorting."""
@@ -345,6 +466,8 @@ def setUp(self):
             "total_rec_calls": 0,
         }
         self.collector.total_samples = 300
+        # successful_samples is used for percentage calculations
+        self.collector.successful_samples = 300
 
     def test_build_stats_list(self):
         """Test that stats list is built correctly."""
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py
@@ -148,6 +148,7 @@ def test_efficiency_bar_visualization(self):
     def test_stats_display_with_different_sort_modes(self):
         """Test that stats are displayed correctly with different sort modes."""
         self.collector.total_samples = 100
+        self.collector.successful_samples = 100  # For percentage calculations
         self.collector.result[("a.py", 1, "func_a")] = {
             "direct_calls": 10,
             "cumulative_calls": 20,