From 28b512e5d8969745147243b119c1197fc8b3ed03 Mon Sep 17 00:00:00 2001 From: Marta Gomez Macias Date: Tue, 23 Dec 2025 15:22:53 +0000 Subject: [PATCH 1/5] fix(tachyon): Exit gracefully and display profiled script errors --- Lib/profiling/sampling/_sync_coordinator.py | 49 +++-- Lib/profiling/sampling/cli.py | 178 +++--------------- .../sampling/live_collector/collector.py | 3 + Lib/profiling/sampling/sample.py | 22 ++- .../test_sampling_profiler/test_cli.py | 74 ++++++++ 5 files changed, 146 insertions(+), 180 deletions(-) diff --git a/Lib/profiling/sampling/_sync_coordinator.py b/Lib/profiling/sampling/_sync_coordinator.py index 1a4af42588a3f5..8c550f3625980c 100644 --- a/Lib/profiling/sampling/_sync_coordinator.py +++ b/Lib/profiling/sampling/_sync_coordinator.py @@ -135,7 +135,7 @@ def _execute_module(module_name: str, module_args: List[str]) -> None: module_args: Arguments to pass to the module Raises: - TargetError: If module execution fails + TargetError: If module cannot be found """ # Replace sys.argv to match how Python normally runs modules # When running 'python -m module args', sys.argv is ["__main__.py", "args"] @@ -145,11 +145,8 @@ def _execute_module(module_name: str, module_args: List[str]) -> None: runpy.run_module(module_name, run_name="__main__", alter_sys=True) except ImportError as e: raise TargetError(f"Module '{module_name}' not found: {e}") from e - except SystemExit: - # SystemExit is normal for modules - pass - except Exception as e: - raise TargetError(f"Error executing module '{module_name}': {e}") from e + # Let other exceptions (including SystemExit) propagate naturally + # so Python prints the full traceback to stderr def _execute_script(script_path: str, script_args: List[str], cwd: str) -> None: @@ -183,22 +180,20 @@ def _execute_script(script_path: str, script_args: List[str], cwd: str) -> None: except PermissionError as e: raise TargetError(f"Permission denied reading script: {script_path}") from e - try: - main_module = types.ModuleType("__main__") - main_module.__file__ = script_path - main_module.__builtins__ = __builtins__ - # gh-140729: Create a __mp_main__ module to allow pickling - sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module + main_module = types.ModuleType("__main__") + main_module.__file__ = script_path + main_module.__builtins__ = __builtins__ + # gh-140729: Create a __mp_main__ module to allow pickling + sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module + try: code = compile(source_code, script_path, 'exec', module='__main__') - exec(code, main_module.__dict__) except SyntaxError as e: raise TargetError(f"Syntax error in script {script_path}: {e}") from e - except SystemExit: - # SystemExit is normal for scripts - pass - except Exception as e: - raise TargetError(f"Error executing script '{script_path}': {e}") from e + + # Execute the script - let exceptions propagate naturally so Python + # prints the full traceback to stderr + exec(code, main_module.__dict__) def main() -> NoReturn: @@ -209,6 +204,8 @@ def main() -> NoReturn: with the sample profiler by signaling when the process is ready to be profiled. """ + # Phase 1: Parse arguments and set up environment + # Errors here are coordinator errors, not script errors try: # Parse and validate arguments sync_port, cwd, target_args = _validate_arguments(sys.argv) @@ -237,21 +234,19 @@ def main() -> NoReturn: # Signal readiness to profiler _signal_readiness(sync_port) - # Execute the target - if is_module: - _execute_module(module_name, module_args) - else: - _execute_script(script_path, script_args, cwd) - except CoordinatorError as e: print(f"Profiler coordinator error: {e}", file=sys.stderr) sys.exit(1) except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(1) - except Exception as e: - print(f"Unexpected error in profiler coordinator: {e}", file=sys.stderr) - sys.exit(1) + + # Phase 2: Execute the target script/module + # Let exceptions propagate naturally so Python prints full tracebacks + if is_module: + _execute_module(module_name, module_args) + else: + _execute_script(script_path, script_args, cwd) # Normal exit sys.exit(0) diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index ccd6e954d79698..289a8b42f82ff7 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -2,7 +2,6 @@ import argparse import importlib.util -import locale import os import selectors import socket @@ -17,8 +16,6 @@ from .stack_collector import CollapsedStackCollector, FlamegraphCollector from .heatmap_collector import HeatmapCollector from .gecko_collector import GeckoCollector -from .binary_collector import BinaryCollector -from .binary_reader import BinaryReader from .constants import ( PROFILING_MODE_ALL, PROFILING_MODE_WALL, @@ -78,7 +75,6 @@ class CustomFormatter( "flamegraph": "html", "gecko": "json", "heatmap": "html", - "binary": "bin", } COLLECTOR_MAP = { @@ -87,7 +83,6 @@ class CustomFormatter( "flamegraph": FlamegraphCollector, "gecko": GeckoCollector, "heatmap": HeatmapCollector, - "binary": BinaryCollector, } def _setup_child_monitor(args, parent_pid): @@ -185,7 +180,7 @@ def _parse_mode(mode_string): def _check_process_died(process): """Check if process died and raise an error with stderr if available.""" if process.poll() is None: - return + return # Process still running # Process died - try to get stderr for error message stderr_msg = "" @@ -270,10 +265,6 @@ def _run_with_sync(original_cmd, suppress_output=False): try: _wait_for_ready_signal(sync_sock, process, _SYNC_TIMEOUT) - # Close stderr pipe if we were capturing it - if process.stderr: - process.stderr.close() - except socket.timeout: # If we timeout, kill the process and raise an error if process.poll() is None: @@ -377,7 +368,7 @@ def _add_mode_options(parser): ) -def _add_format_options(parser, include_compression=True, include_binary=True): +def _add_format_options(parser): """Add output format options to a parser.""" output_group = parser.add_argument_group("Output options") format_group = output_group.add_mutually_exclusive_group() @@ -416,24 +407,8 @@ def _add_format_options(parser, include_compression=True, include_binary=True): dest="format", help="Generate interactive HTML heatmap visualization with line-level sample counts", ) - if include_binary: - format_group.add_argument( - "--binary", - action="store_const", - const="binary", - dest="format", - help="Generate high-performance binary format (use 'replay' command to convert)", - ) parser.set_defaults(format="pstats") - if include_compression: - output_group.add_argument( - "--compression", - choices=["auto", "zstd", "none"], - default="auto", - help="Compression for binary format: auto (use zstd if available), zstd, none", - ) - output_group.add_argument( "-o", "--output", @@ -488,18 +463,15 @@ def _sort_to_mode(sort_choice): return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) -def _create_collector(format_type, interval, skip_idle, opcodes=False, - output_file=None, compression='auto'): +def _create_collector(format_type, interval, skip_idle, opcodes=False): """Create the appropriate collector based on format type. Args: - format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary') + format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap') interval: Sampling interval in microseconds skip_idle: Whether to skip idle samples opcodes: Whether to collect opcode information (only used by gecko format for creating interval markers in Firefox Profiler) - output_file: Output file path (required for binary format) - compression: Compression type for binary format ('auto', 'zstd', 'none') Returns: A collector instance of the appropriate type @@ -508,13 +480,6 @@ def _create_collector(format_type, interval, skip_idle, opcodes=False, if collector_class is None: raise ValueError(f"Unknown format: {format_type}") - # Binary format requires output file and compression - if format_type == "binary": - if output_file is None: - raise ValueError("Binary format requires an output file") - return collector_class(output_file, interval, skip_idle=skip_idle, - compression=compression) - # Gecko format never skips idle (it needs both GIL and CPU data) # and is the only format that uses opcodes for interval markers if format_type == "gecko": @@ -550,12 +515,7 @@ def _handle_output(collector, args, pid, mode): pid: Process ID (for generating filenames) mode: Profiling mode used """ - if args.format == "binary": - # Binary format already wrote to file incrementally, just finalize - collector.export(None) - filename = collector.filename - print(f"Binary profile written to {filename} ({collector.total_samples} samples)") - elif args.format == "pstats": + if args.format == "pstats": if args.outfile: # If outfile is a directory, generate filename inside it if os.path.isdir(args.outfile): @@ -588,10 +548,6 @@ def _validate_args(args, parser): args: Parsed command-line arguments parser: ArgumentParser instance for error reporting """ - # Replay command has no special validation needed - if getattr(args, 'command', None) == "replay": - return - # Warn about blocking mode with aggressive sampling intervals if args.blocking and args.interval < 100: print( @@ -613,7 +569,7 @@ def _validate_args(args, parser): parser.error("--subprocesses is incompatible with --live mode.") # Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads - if getattr(args, 'async_aware', False): + if args.async_aware: issues = [] if args.native: issues.append("--native") @@ -630,7 +586,7 @@ def _validate_args(args, parser): ) # --async-mode requires --async-aware - if hasattr(args, 'async_mode') and args.async_mode != "running" and not getattr(args, 'async_aware', False): + if hasattr(args, 'async_mode') and args.async_mode != "running" and not args.async_aware: parser.error("--async-mode requires --async-aware to be enabled.") # Live mode is incompatible with format options @@ -658,7 +614,7 @@ def _validate_args(args, parser): return # Validate gecko mode doesn't use non-wall mode - if args.format == "gecko" and getattr(args, 'mode', 'wall') != "wall": + if args.format == "gecko" and args.mode != "wall": parser.error( "--mode option is incompatible with --gecko. " "Gecko format automatically includes both GIL-holding and CPU status analysis." @@ -666,7 +622,7 @@ def _validate_args(args, parser): # Validate --opcodes is only used with compatible formats opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap") - if getattr(args, 'opcodes', False) and args.format not in opcodes_compatible_formats: + if args.opcodes and args.format not in opcodes_compatible_formats: parser.error( f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}." ) @@ -690,16 +646,6 @@ def _validate_args(args, parser): def main(): """Main entry point for the CLI.""" - # Set locale for number formatting, restore on exit - old_locale = locale.setlocale(locale.LC_ALL, None) - locale.setlocale(locale.LC_ALL, "") - try: - _main() - finally: - locale.setlocale(locale.LC_ALL, old_locale) - - -def _main(): # Create the main parser parser = argparse.ArgumentParser( description=_HELP_DESCRIPTION, @@ -788,30 +734,6 @@ def _main(): _add_format_options(attach_parser) _add_pstats_options(attach_parser) - # === REPLAY COMMAND === - replay_parser = subparsers.add_parser( - "replay", - help="Replay a binary profile and convert to another format", - formatter_class=CustomFormatter, - description="""Replay a binary profile file and convert to another format - -Examples: - # Convert binary to flamegraph - `python -m profiling.sampling replay --flamegraph -o output.html profile.bin` - - # Convert binary to pstats and print to stdout - `python -m profiling.sampling replay profile.bin` - - # Convert binary to gecko format - `python -m profiling.sampling replay --gecko -o profile.json profile.bin`""", - ) - replay_parser.add_argument( - "input_file", - help="Binary profile file to replay", - ) - _add_format_options(replay_parser, include_compression=False, include_binary=False) - _add_pstats_options(replay_parser) - # Parse arguments args = parser.parse_args() @@ -822,7 +744,6 @@ def _main(): command_handlers = { "run": _handle_run, "attach": _handle_attach, - "replay": _handle_replay, } # Execute the appropriate command @@ -854,16 +775,8 @@ def _handle_attach(args): mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False ) - output_file = None - if args.format == "binary": - output_file = args.outfile or _generate_output_filename(args.format, args.pid) - # Create the appropriate collector - collector = _create_collector( - args.format, args.interval, skip_idle, args.opcodes, - output_file=output_file, - compression=getattr(args, 'compression', 'auto') - ) + collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) with _get_child_monitor_context(args, args.pid): collector = sample( @@ -932,16 +845,8 @@ def _handle_run(args): mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False ) - output_file = None - if args.format == "binary": - output_file = args.outfile or _generate_output_filename(args.format, process.pid) - # Create the appropriate collector - collector = _create_collector( - args.format, args.interval, skip_idle, args.opcodes, - output_file=output_file, - compression=getattr(args, 'compression', 'auto') - ) + collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) with _get_child_monitor_context(args, process.pid): try: @@ -1053,57 +958,26 @@ def _handle_live_run(args): blocking=args.blocking, ) finally: - # Clean up the subprocess - if process.poll() is None: + # Clean up the subprocess and get any error output + returncode = process.poll() + if returncode is None: + # Process still running - terminate it process.terminate() try: process.wait(timeout=_PROCESS_KILL_TIMEOUT) except subprocess.TimeoutExpired: process.kill() - process.wait() - - -def _handle_replay(args): - """Handle the 'replay' command - convert binary profile to another format.""" - import os - - if not os.path.exists(args.input_file): - sys.exit(f"Error: Input file not found: {args.input_file}") - - with BinaryReader(args.input_file) as reader: - info = reader.get_info() - interval = info['sample_interval_us'] - - print(f"Replaying {info['sample_count']} samples from {args.input_file}") - print(f" Sample interval: {interval} us") - print(f" Compression: {'zstd' if info.get('compression_type', 0) == 1 else 'none'}") - - collector = _create_collector(args.format, interval, skip_idle=False) - - def progress_callback(current, total): - if total > 0: - pct = current / total - bar_width = 40 - filled = int(bar_width * pct) - bar = '█' * filled + '░' * (bar_width - filled) - print(f"\r [{bar}] {pct*100:5.1f}% ({current:,}/{total:,})", end="", flush=True) - - count = reader.replay_samples(collector, progress_callback) - print() - - if args.format == "pstats": - if args.outfile: - collector.export(args.outfile) - else: - sort_choice = args.sort if args.sort is not None else "nsamples" - limit = args.limit if args.limit is not None else 15 - sort_mode = _sort_to_mode(sort_choice) - collector.print_stats(sort_mode, limit, not args.no_summary, PROFILING_MODE_WALL) - else: - filename = args.outfile or _generate_output_filename(args.format, os.getpid()) - collector.export(filename) - - print(f"Replayed {count} samples") + # Ensure process is fully terminated + process.wait() + # Read any stderr output (tracebacks, errors, etc.) + if process.stderr: + try: + stderr = process.stderr.read() + if stderr: + print(stderr.decode(), file=sys.stderr) + except (OSError, ValueError): + # Ignore errors if pipe is already closed + pass if __name__ == "__main__": diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py index dcb9fcabe32779..c196d6341ce49e 100644 --- a/Lib/profiling/sampling/live_collector/collector.py +++ b/Lib/profiling/sampling/live_collector/collector.py @@ -216,6 +216,9 @@ def __init__( def elapsed_time(self): """Get the elapsed time, frozen when finished.""" if self.finished and self.finish_timestamp is not None: + # Handle case where process exited before any samples were collected + if self.start_time is None: + return 0 return self.finish_timestamp - self.start_time return time.perf_counter() - self.start_time if self.start_time else 0 diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 2fe022c85b0b31..93397f70059538 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -42,7 +42,9 @@ def _pause_threads(unwinder, blocking): LiveStatsCollector = None _FREE_THREADED_BUILD = sysconfig.get_config_var("Py_GIL_DISABLED") is not None - +# Minimum number of samples required before showing the TUI +# If fewer samples are collected, we skip the TUI and just print a message +MIN_SAMPLES_FOR_TUI = 200 class SampleProfiler: def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, opcodes=False, skip_non_matching_threads=True, collect_stats=False, blocking=False): @@ -458,6 +460,11 @@ def sample_live( """ import curses + # Check if process is alive before doing any heavy initialization + if not _is_process_running(pid): + print(f"No samples collected - process {pid} exited before profiling could begin.", file=sys.stderr) + return collector + # Get sample interval from collector sample_interval_usec = collector.sample_interval_usec @@ -485,6 +492,12 @@ def curses_wrapper_func(stdscr): collector.init_curses(stdscr) try: profiler.sample(collector, duration_sec, async_aware=async_aware) + # If too few samples were collected, exit cleanly without showing TUI + if collector.successful_samples < MIN_SAMPLES_FOR_TUI: + # Clear screen before exiting to avoid visual artifacts + stdscr.clear() + stdscr.refresh() + return # Mark as finished and keep the TUI running until user presses 'q' collector.mark_finished() # Keep processing input until user quits @@ -499,4 +512,11 @@ def curses_wrapper_func(stdscr): except KeyboardInterrupt: pass + # If too few samples were collected, print a message + if collector.successful_samples < MIN_SAMPLES_FOR_TUI: + if collector.successful_samples == 0: + print(f"No samples collected - process {pid} exited before profiling could begin.", file=sys.stderr) + else: + print(f"Only {collector.successful_samples} sample(s) collected (minimum {MIN_SAMPLES_FOR_TUI} required for TUI) - process {pid} exited too quickly.", file=sys.stderr) + return collector diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py index 9b2b16d6e1965b..036887b5e43056 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py @@ -1,10 +1,12 @@ """Tests for sampling profiler CLI argument parsing and functionality.""" +import functools import io import subprocess import sys import unittest from unittest import mock +import tempfile try: import _remote_debugging # noqa: F401 @@ -17,6 +19,7 @@ from profiling.sampling.cli import main from profiling.sampling.errors import SamplingScriptNotFoundError, SamplingModuleNotFoundError, SamplingUnknownProcessError +from profiling.sampling.live_collector import MockDisplay class TestSampleProfilerCLI(unittest.TestCase): @@ -722,3 +725,74 @@ def test_cli_attach_nonexistent_pid(self): main() self.assertIn(fake_pid, str(cm.exception)) + + +class TestLiveModeErrors(unittest.TestCase): + """Tests running error commands in the live mode fails gracefully.""" + + def mock_curses_wrapper(self, func): + func(mock.MagicMock()) + + def mock_init_curses_side_effect(self, n_times, mock_self, stdscr): + mock_self.display = MockDisplay() + # Allow the loop to run for a bit (approx 0.5s) before quitting + # This ensures we don't exit too early while the subprocess is + # still failing + for _ in range(n_times): + mock_self.display.simulate_input(-1) + if n_times >= 500: + mock_self.display.simulate_input(ord('q')) + + @unittest.skipIf(is_emscripten, "subprocess not available") + def test_run_failed_module_live(self): + """Test that running a existing module that fails exists with clean error.""" + + args = [ + "profiling.sampling.cli", "run", "--live", "-m", "test", + "test_asdasd" + ] + + with ( + mock.patch( + 'profiling.sampling.live_collector.collector.LiveStatsCollector.init_curses', + autospec=True, + side_effect=functools.partial(self.mock_init_curses_side_effect, 750) + ), + mock.patch('curses.wrapper', side_effect=self.mock_curses_wrapper), + mock.patch("sys.argv", args), + mock.patch('sys.stderr', new=io.StringIO()) as fake_stderr + ): + main() + self.assertStartsWith( + fake_stderr.getvalue(), + '\x1b[31mtest test_asdasd crashed -- Traceback (most recent call last):' + ) + + @unittest.skipIf(is_emscripten, "subprocess not available") + def test_run_failed_script_live(self): + """Test that running a failing script exits with clean error.""" + script = tempfile.NamedTemporaryFile(suffix=".py") + script.write(b'1/0\n') + script.seek(0) + + args = ["profiling.sampling.cli", "run", "--live", script.name] + + with ( + mock.patch( + 'profiling.sampling.live_collector.collector.LiveStatsCollector.init_curses', + autospec=True, + side_effect=functools.partial(self.mock_init_curses_side_effect, 200) + ), + mock.patch('curses.wrapper', side_effect=self.mock_curses_wrapper), + mock.patch("sys.argv", args), + mock.patch('sys.stderr', new=io.StringIO()) as fake_stderr + ): + main() + stderr = fake_stderr.getvalue() + self.assertIn( + 'sample(s) collected (minimum 200 required for TUI)', stderr + ) + self.assertEndsWith( + stderr, + 'ZeroDivisionError\x1b[0m: \x1b[35mdivision by zero\x1b[0m\n\n' + ) \ No newline at end of file From db38449bd9879bf73ea8ada5e07b899befe61bc9 Mon Sep 17 00:00:00 2001 From: Marta Gomez Macias Date: Tue, 23 Dec 2025 15:40:14 +0000 Subject: [PATCH 2/5] fix conflicts --- Lib/profiling/sampling/cli.py | 157 +++++++++++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 12 deletions(-) diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index 289a8b42f82ff7..4d2d5b37453724 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -2,6 +2,7 @@ import argparse import importlib.util +import locale import os import selectors import socket @@ -16,6 +17,8 @@ from .stack_collector import CollapsedStackCollector, FlamegraphCollector from .heatmap_collector import HeatmapCollector from .gecko_collector import GeckoCollector +from .binary_collector import BinaryCollector +from .binary_reader import BinaryReader from .constants import ( PROFILING_MODE_ALL, PROFILING_MODE_WALL, @@ -75,6 +78,7 @@ class CustomFormatter( "flamegraph": "html", "gecko": "json", "heatmap": "html", + "binary": "bin", } COLLECTOR_MAP = { @@ -83,6 +87,7 @@ class CustomFormatter( "flamegraph": FlamegraphCollector, "gecko": GeckoCollector, "heatmap": HeatmapCollector, + "binary": BinaryCollector, } def _setup_child_monitor(args, parent_pid): @@ -180,7 +185,7 @@ def _parse_mode(mode_string): def _check_process_died(process): """Check if process died and raise an error with stderr if available.""" if process.poll() is None: - return # Process still running + return # Process died - try to get stderr for error message stderr_msg = "" @@ -264,7 +269,6 @@ def _run_with_sync(original_cmd, suppress_output=False): try: _wait_for_ready_signal(sync_sock, process, _SYNC_TIMEOUT) - except socket.timeout: # If we timeout, kill the process and raise an error if process.poll() is None: @@ -368,7 +372,7 @@ def _add_mode_options(parser): ) -def _add_format_options(parser): +def _add_format_options(parser, include_compression=True, include_binary=True): """Add output format options to a parser.""" output_group = parser.add_argument_group("Output options") format_group = output_group.add_mutually_exclusive_group() @@ -407,8 +411,24 @@ def _add_format_options(parser): dest="format", help="Generate interactive HTML heatmap visualization with line-level sample counts", ) + if include_binary: + format_group.add_argument( + "--binary", + action="store_const", + const="binary", + dest="format", + help="Generate high-performance binary format (use 'replay' command to convert)", + ) parser.set_defaults(format="pstats") + if include_compression: + output_group.add_argument( + "--compression", + choices=["auto", "zstd", "none"], + default="auto", + help="Compression for binary format: auto (use zstd if available), zstd, none", + ) + output_group.add_argument( "-o", "--output", @@ -463,15 +483,18 @@ def _sort_to_mode(sort_choice): return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) -def _create_collector(format_type, interval, skip_idle, opcodes=False): +def _create_collector(format_type, interval, skip_idle, opcodes=False, + output_file=None, compression='auto'): """Create the appropriate collector based on format type. Args: - format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap') + format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary') interval: Sampling interval in microseconds skip_idle: Whether to skip idle samples opcodes: Whether to collect opcode information (only used by gecko format for creating interval markers in Firefox Profiler) + output_file: Output file path (required for binary format) + compression: Compression type for binary format ('auto', 'zstd', 'none') Returns: A collector instance of the appropriate type @@ -480,6 +503,13 @@ def _create_collector(format_type, interval, skip_idle, opcodes=False): if collector_class is None: raise ValueError(f"Unknown format: {format_type}") + # Binary format requires output file and compression + if format_type == "binary": + if output_file is None: + raise ValueError("Binary format requires an output file") + return collector_class(output_file, interval, skip_idle=skip_idle, + compression=compression) + # Gecko format never skips idle (it needs both GIL and CPU data) # and is the only format that uses opcodes for interval markers if format_type == "gecko": @@ -515,7 +545,12 @@ def _handle_output(collector, args, pid, mode): pid: Process ID (for generating filenames) mode: Profiling mode used """ - if args.format == "pstats": + if args.format == "binary": + # Binary format already wrote to file incrementally, just finalize + collector.export(None) + filename = collector.filename + print(f"Binary profile written to {filename} ({collector.total_samples} samples)") + elif args.format == "pstats": if args.outfile: # If outfile is a directory, generate filename inside it if os.path.isdir(args.outfile): @@ -548,6 +583,10 @@ def _validate_args(args, parser): args: Parsed command-line arguments parser: ArgumentParser instance for error reporting """ + # Replay command has no special validation needed + if getattr(args, 'command', None) == "replay": + return + # Warn about blocking mode with aggressive sampling intervals if args.blocking and args.interval < 100: print( @@ -569,7 +608,7 @@ def _validate_args(args, parser): parser.error("--subprocesses is incompatible with --live mode.") # Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads - if args.async_aware: + if getattr(args, 'async_aware', False): issues = [] if args.native: issues.append("--native") @@ -586,7 +625,7 @@ def _validate_args(args, parser): ) # --async-mode requires --async-aware - if hasattr(args, 'async_mode') and args.async_mode != "running" and not args.async_aware: + if hasattr(args, 'async_mode') and args.async_mode != "running" and not getattr(args, 'async_aware', False): parser.error("--async-mode requires --async-aware to be enabled.") # Live mode is incompatible with format options @@ -614,7 +653,7 @@ def _validate_args(args, parser): return # Validate gecko mode doesn't use non-wall mode - if args.format == "gecko" and args.mode != "wall": + if args.format == "gecko" and getattr(args, 'mode', 'wall') != "wall": parser.error( "--mode option is incompatible with --gecko. " "Gecko format automatically includes both GIL-holding and CPU status analysis." @@ -622,7 +661,7 @@ def _validate_args(args, parser): # Validate --opcodes is only used with compatible formats opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap") - if args.opcodes and args.format not in opcodes_compatible_formats: + if getattr(args, 'opcodes', False) and args.format not in opcodes_compatible_formats: parser.error( f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}." ) @@ -646,6 +685,16 @@ def _validate_args(args, parser): def main(): """Main entry point for the CLI.""" + # Set locale for number formatting, restore on exit + old_locale = locale.setlocale(locale.LC_ALL, None) + locale.setlocale(locale.LC_ALL, "") + try: + _main() + finally: + locale.setlocale(locale.LC_ALL, old_locale) + + +def _main(): # Create the main parser parser = argparse.ArgumentParser( description=_HELP_DESCRIPTION, @@ -734,6 +783,30 @@ def main(): _add_format_options(attach_parser) _add_pstats_options(attach_parser) + # === REPLAY COMMAND === + replay_parser = subparsers.add_parser( + "replay", + help="Replay a binary profile and convert to another format", + formatter_class=CustomFormatter, + description="""Replay a binary profile file and convert to another format + +Examples: + # Convert binary to flamegraph + `python -m profiling.sampling replay --flamegraph -o output.html profile.bin` + + # Convert binary to pstats and print to stdout + `python -m profiling.sampling replay profile.bin` + + # Convert binary to gecko format + `python -m profiling.sampling replay --gecko -o profile.json profile.bin`""", + ) + replay_parser.add_argument( + "input_file", + help="Binary profile file to replay", + ) + _add_format_options(replay_parser, include_compression=False, include_binary=False) + _add_pstats_options(replay_parser) + # Parse arguments args = parser.parse_args() @@ -744,6 +817,7 @@ def main(): command_handlers = { "run": _handle_run, "attach": _handle_attach, + "replay": _handle_replay, } # Execute the appropriate command @@ -775,8 +849,16 @@ def _handle_attach(args): mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False ) + output_file = None + if args.format == "binary": + output_file = args.outfile or _generate_output_filename(args.format, args.pid) + # Create the appropriate collector - collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) + collector = _create_collector( + args.format, args.interval, skip_idle, args.opcodes, + output_file=output_file, + compression=getattr(args, 'compression', 'auto') + ) with _get_child_monitor_context(args, args.pid): collector = sample( @@ -845,8 +927,16 @@ def _handle_run(args): mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False ) + output_file = None + if args.format == "binary": + output_file = args.outfile or _generate_output_filename(args.format, process.pid) + # Create the appropriate collector - collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) + collector = _create_collector( + args.format, args.interval, skip_idle, args.opcodes, + output_file=output_file, + compression=getattr(args, 'compression', 'auto') + ) with _get_child_monitor_context(args, process.pid): try: @@ -980,5 +1070,48 @@ def _handle_live_run(args): pass +def _handle_replay(args): + """Handle the 'replay' command - convert binary profile to another format.""" + import os + + if not os.path.exists(args.input_file): + sys.exit(f"Error: Input file not found: {args.input_file}") + + with BinaryReader(args.input_file) as reader: + info = reader.get_info() + interval = info['sample_interval_us'] + + print(f"Replaying {info['sample_count']} samples from {args.input_file}") + print(f" Sample interval: {interval} us") + print(f" Compression: {'zstd' if info.get('compression_type', 0) == 1 else 'none'}") + + collector = _create_collector(args.format, interval, skip_idle=False) + + def progress_callback(current, total): + if total > 0: + pct = current / total + bar_width = 40 + filled = int(bar_width * pct) + bar = '█' * filled + '░' * (bar_width - filled) + print(f"\r [{bar}] {pct*100:5.1f}% ({current:,}/{total:,})", end="", flush=True) + + count = reader.replay_samples(collector, progress_callback) + print() + + if args.format == "pstats": + if args.outfile: + collector.export(args.outfile) + else: + sort_choice = args.sort if args.sort is not None else "nsamples" + limit = args.limit if args.limit is not None else 15 + sort_mode = _sort_to_mode(sort_choice) + collector.print_stats(sort_mode, limit, not args.no_summary, PROFILING_MODE_WALL) + else: + filename = args.outfile or _generate_output_filename(args.format, os.getpid()) + collector.export(filename) + + print(f"Replayed {count} samples") + + if __name__ == "__main__": main() From 0af647c39b4f889f5587155472703cd668e1eca7 Mon Sep 17 00:00:00 2001 From: Marta Gomez Macias Date: Tue, 23 Dec 2025 15:46:38 +0000 Subject: [PATCH 3/5] . --- Lib/test/test_profiling/test_sampling_profiler/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py index 036887b5e43056..98894bcbcf308f 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py @@ -756,7 +756,7 @@ def test_run_failed_module_live(self): mock.patch( 'profiling.sampling.live_collector.collector.LiveStatsCollector.init_curses', autospec=True, - side_effect=functools.partial(self.mock_init_curses_side_effect, 750) + side_effect=functools.partial(self.mock_init_curses_side_effect, 1000) ), mock.patch('curses.wrapper', side_effect=self.mock_curses_wrapper), mock.patch("sys.argv", args), @@ -795,4 +795,4 @@ def test_run_failed_script_live(self): self.assertEndsWith( stderr, 'ZeroDivisionError\x1b[0m: \x1b[35mdivision by zero\x1b[0m\n\n' - ) \ No newline at end of file + ) From d7d5e870511081c8fb8817ba3dc19c56c7713e1a Mon Sep 17 00:00:00 2001 From: Marta Gomez Macias Date: Tue, 23 Dec 2025 23:48:31 +0000 Subject: [PATCH 4/5] only run the tests if curses is enabled --- .../test_sampling_profiler/test_cli.py | 75 ------------------ .../test_live_collector_ui.py | 77 ++++++++++++++++++- 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py index 98894bcbcf308f..bd76d1695a5a4a 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py @@ -1,12 +1,10 @@ """Tests for sampling profiler CLI argument parsing and functionality.""" -import functools import io import subprocess import sys import unittest from unittest import mock -import tempfile try: import _remote_debugging # noqa: F401 @@ -19,8 +17,6 @@ from profiling.sampling.cli import main from profiling.sampling.errors import SamplingScriptNotFoundError, SamplingModuleNotFoundError, SamplingUnknownProcessError -from profiling.sampling.live_collector import MockDisplay - class TestSampleProfilerCLI(unittest.TestCase): def _setup_sync_mocks(self, mock_socket, mock_popen): @@ -725,74 +721,3 @@ def test_cli_attach_nonexistent_pid(self): main() self.assertIn(fake_pid, str(cm.exception)) - - -class TestLiveModeErrors(unittest.TestCase): - """Tests running error commands in the live mode fails gracefully.""" - - def mock_curses_wrapper(self, func): - func(mock.MagicMock()) - - def mock_init_curses_side_effect(self, n_times, mock_self, stdscr): - mock_self.display = MockDisplay() - # Allow the loop to run for a bit (approx 0.5s) before quitting - # This ensures we don't exit too early while the subprocess is - # still failing - for _ in range(n_times): - mock_self.display.simulate_input(-1) - if n_times >= 500: - mock_self.display.simulate_input(ord('q')) - - @unittest.skipIf(is_emscripten, "subprocess not available") - def test_run_failed_module_live(self): - """Test that running a existing module that fails exists with clean error.""" - - args = [ - "profiling.sampling.cli", "run", "--live", "-m", "test", - "test_asdasd" - ] - - with ( - mock.patch( - 'profiling.sampling.live_collector.collector.LiveStatsCollector.init_curses', - autospec=True, - side_effect=functools.partial(self.mock_init_curses_side_effect, 1000) - ), - mock.patch('curses.wrapper', side_effect=self.mock_curses_wrapper), - mock.patch("sys.argv", args), - mock.patch('sys.stderr', new=io.StringIO()) as fake_stderr - ): - main() - self.assertStartsWith( - fake_stderr.getvalue(), - '\x1b[31mtest test_asdasd crashed -- Traceback (most recent call last):' - ) - - @unittest.skipIf(is_emscripten, "subprocess not available") - def test_run_failed_script_live(self): - """Test that running a failing script exits with clean error.""" - script = tempfile.NamedTemporaryFile(suffix=".py") - script.write(b'1/0\n') - script.seek(0) - - args = ["profiling.sampling.cli", "run", "--live", script.name] - - with ( - mock.patch( - 'profiling.sampling.live_collector.collector.LiveStatsCollector.init_curses', - autospec=True, - side_effect=functools.partial(self.mock_init_curses_side_effect, 200) - ), - mock.patch('curses.wrapper', side_effect=self.mock_curses_wrapper), - mock.patch("sys.argv", args), - mock.patch('sys.stderr', new=io.StringIO()) as fake_stderr - ): - main() - stderr = fake_stderr.getvalue() - self.assertIn( - 'sample(s) collected (minimum 200 required for TUI)', stderr - ) - self.assertEndsWith( - stderr, - 'ZeroDivisionError\x1b[0m: \x1b[35mdivision by zero\x1b[0m\n\n' - ) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py index 2ed9d82a4a4aa2..e619db73d76ea1 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py @@ -4,11 +4,14 @@ edge cases, update display, and display helpers. """ +import functools +import io import sys +import tempfile import time import unittest from unittest import mock -from test.support import requires +from test.support import requires, is_emscripten from test.support.import_helper import import_module # Only run these tests if curses is available @@ -16,6 +19,7 @@ curses = import_module("curses") from profiling.sampling.live_collector import LiveStatsCollector, MockDisplay +from profiling.sampling.cli import main from ._live_collector_helpers import ( MockThreadInfo, MockInterpreterInfo, @@ -816,5 +820,76 @@ def test_get_all_lines_full_display(self): self.assertTrue(any("PID" in line for line in lines)) +class TestLiveModeErrors(unittest.TestCase): + """Tests running error commands in the live mode fails gracefully.""" + + def mock_curses_wrapper(self, func): + func(mock.MagicMock()) + + def mock_init_curses_side_effect(self, n_times, mock_self, stdscr): + mock_self.display = MockDisplay() + # Allow the loop to run for a bit (approx 0.5s) before quitting + # This ensures we don't exit too early while the subprocess is + # still failing + for _ in range(n_times): + mock_self.display.simulate_input(-1) + if n_times >= 500: + mock_self.display.simulate_input(ord('q')) + + @unittest.skipIf(is_emscripten, "subprocess not available") + def test_run_failed_module_live(self): + """Test that running a existing module that fails exists with clean error.""" + + args = [ + "profiling.sampling.cli", "run", "--live", "-m", "test", + "test_asdasd" + ] + + with ( + mock.patch( + 'profiling.sampling.live_collector.collector.LiveStatsCollector.init_curses', + autospec=True, + side_effect=functools.partial(self.mock_init_curses_side_effect, 1000) + ), + mock.patch('curses.wrapper', side_effect=self.mock_curses_wrapper), + mock.patch("sys.argv", args), + mock.patch('sys.stderr', new=io.StringIO()) as fake_stderr + ): + main() + self.assertStartsWith( + fake_stderr.getvalue(), + '\x1b[31mtest test_asdasd crashed -- Traceback (most recent call last):' + ) + + @unittest.skipIf(is_emscripten, "subprocess not available") + def test_run_failed_script_live(self): + """Test that running a failing script exits with clean error.""" + script = tempfile.NamedTemporaryFile(suffix=".py") + script.write(b'1/0\n') + script.seek(0) + + args = ["profiling.sampling.cli", "run", "--live", script.name] + + with ( + mock.patch( + 'profiling.sampling.live_collector.collector.LiveStatsCollector.init_curses', + autospec=True, + side_effect=functools.partial(self.mock_init_curses_side_effect, 200) + ), + mock.patch('curses.wrapper', side_effect=self.mock_curses_wrapper), + mock.patch("sys.argv", args), + mock.patch('sys.stderr', new=io.StringIO()) as fake_stderr + ): + main() + stderr = fake_stderr.getvalue() + self.assertIn( + 'sample(s) collected (minimum 200 required for TUI)', stderr + ) + self.assertEndsWith( + stderr, + 'ZeroDivisionError\x1b[0m: \x1b[35mdivision by zero\x1b[0m\n\n' + ) + + if __name__ == "__main__": unittest.main() From 79e3121db4ecc520356b97d74d29b7d752493dbd Mon Sep 17 00:00:00 2001 From: Marta Gomez Macias Date: Wed, 24 Dec 2025 00:06:11 +0000 Subject: [PATCH 5/5] add env cleanup --- .../test_sampling_profiler/test_live_collector_ui.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py index e619db73d76ea1..b492b471f820bf 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py @@ -24,6 +24,7 @@ MockThreadInfo, MockInterpreterInfo, ) +from .helpers import close_and_unlink class TestLiveStatsCollectorWithMockDisplay(unittest.TestCase): @@ -865,6 +866,7 @@ def test_run_failed_module_live(self): def test_run_failed_script_live(self): """Test that running a failing script exits with clean error.""" script = tempfile.NamedTemporaryFile(suffix=".py") + self.addCleanup(close_and_unlink, script) script.write(b'1/0\n') script.seek(0)