22
33import argparse
44import importlib .util
5+ import locale
56import os
67import selectors
78import socket
1617from .stack_collector import CollapsedStackCollector , FlamegraphCollector
1718from .heatmap_collector import HeatmapCollector
1819from .gecko_collector import GeckoCollector
20+ from .binary_collector import BinaryCollector
21+ from .binary_reader import BinaryReader
1922from .constants import (
2023 PROFILING_MODE_ALL ,
2124 PROFILING_MODE_WALL ,
@@ -75,6 +78,7 @@ class CustomFormatter(
7578 "flamegraph" : "html" ,
7679 "gecko" : "json" ,
7780 "heatmap" : "html" ,
81+ "binary" : "bin" ,
7882}
7983
8084COLLECTOR_MAP = {
@@ -83,6 +87,7 @@ class CustomFormatter(
8387 "flamegraph" : FlamegraphCollector ,
8488 "gecko" : GeckoCollector ,
8589 "heatmap" : HeatmapCollector ,
90+ "binary" : BinaryCollector ,
8691}
8792
8893def _setup_child_monitor (args , parent_pid ):
@@ -180,7 +185,7 @@ def _parse_mode(mode_string):
180185def _check_process_died (process ):
181186 """Check if process died and raise an error with stderr if available."""
182187 if process .poll () is None :
183- return # Process still running
188+ return
184189
185190 # Process died - try to get stderr for error message
186191 stderr_msg = ""
@@ -264,7 +269,6 @@ def _run_with_sync(original_cmd, suppress_output=False):
264269
265270 try :
266271 _wait_for_ready_signal (sync_sock , process , _SYNC_TIMEOUT )
267-
268272 except socket .timeout :
269273 # If we timeout, kill the process and raise an error
270274 if process .poll () is None :
@@ -368,7 +372,7 @@ def _add_mode_options(parser):
368372 )
369373
370374
371- def _add_format_options (parser ):
375+ def _add_format_options (parser , include_compression = True , include_binary = True ):
372376 """Add output format options to a parser."""
373377 output_group = parser .add_argument_group ("Output options" )
374378 format_group = output_group .add_mutually_exclusive_group ()
@@ -407,8 +411,24 @@ def _add_format_options(parser):
407411 dest = "format" ,
408412 help = "Generate interactive HTML heatmap visualization with line-level sample counts" ,
409413 )
414+ if include_binary :
415+ format_group .add_argument (
416+ "--binary" ,
417+ action = "store_const" ,
418+ const = "binary" ,
419+ dest = "format" ,
420+ help = "Generate high-performance binary format (use 'replay' command to convert)" ,
421+ )
410422 parser .set_defaults (format = "pstats" )
411423
424+ if include_compression :
425+ output_group .add_argument (
426+ "--compression" ,
427+ choices = ["auto" , "zstd" , "none" ],
428+ default = "auto" ,
429+ help = "Compression for binary format: auto (use zstd if available), zstd, none" ,
430+ )
431+
412432 output_group .add_argument (
413433 "-o" ,
414434 "--output" ,
@@ -463,15 +483,18 @@ def _sort_to_mode(sort_choice):
463483 return sort_map .get (sort_choice , SORT_MODE_NSAMPLES )
464484
465485
466- def _create_collector (format_type , interval , skip_idle , opcodes = False ):
486+ def _create_collector (format_type , interval , skip_idle , opcodes = False ,
487+ output_file = None , compression = 'auto' ):
467488 """Create the appropriate collector based on format type.
468489
469490 Args:
470- format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap')
491+ format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary' )
471492 interval: Sampling interval in microseconds
472493 skip_idle: Whether to skip idle samples
473494 opcodes: Whether to collect opcode information (only used by gecko format
474495 for creating interval markers in Firefox Profiler)
496+ output_file: Output file path (required for binary format)
497+ compression: Compression type for binary format ('auto', 'zstd', 'none')
475498
476499 Returns:
477500 A collector instance of the appropriate type
@@ -480,6 +503,13 @@ def _create_collector(format_type, interval, skip_idle, opcodes=False):
480503 if collector_class is None :
481504 raise ValueError (f"Unknown format: { format_type } " )
482505
506+ # Binary format requires output file and compression
507+ if format_type == "binary" :
508+ if output_file is None :
509+ raise ValueError ("Binary format requires an output file" )
510+ return collector_class (output_file , interval , skip_idle = skip_idle ,
511+ compression = compression )
512+
483513 # Gecko format never skips idle (it needs both GIL and CPU data)
484514 # and is the only format that uses opcodes for interval markers
485515 if format_type == "gecko" :
@@ -515,7 +545,12 @@ def _handle_output(collector, args, pid, mode):
515545 pid: Process ID (for generating filenames)
516546 mode: Profiling mode used
517547 """
518- if args .format == "pstats" :
548+ if args .format == "binary" :
549+ # Binary format already wrote to file incrementally, just finalize
550+ collector .export (None )
551+ filename = collector .filename
552+ print (f"Binary profile written to { filename } ({ collector .total_samples } samples)" )
553+ elif args .format == "pstats" :
519554 if args .outfile :
520555 # If outfile is a directory, generate filename inside it
521556 if os .path .isdir (args .outfile ):
@@ -548,6 +583,10 @@ def _validate_args(args, parser):
548583 args: Parsed command-line arguments
549584 parser: ArgumentParser instance for error reporting
550585 """
586+ # Replay command has no special validation needed
587+ if getattr (args , 'command' , None ) == "replay" :
588+ return
589+
551590 # Warn about blocking mode with aggressive sampling intervals
552591 if args .blocking and args .interval < 100 :
553592 print (
@@ -569,7 +608,7 @@ def _validate_args(args, parser):
569608 parser .error ("--subprocesses is incompatible with --live mode." )
570609
571610 # Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads
572- if args . async_aware :
611+ if getattr ( args , ' async_aware' , False ) :
573612 issues = []
574613 if args .native :
575614 issues .append ("--native" )
@@ -586,7 +625,7 @@ def _validate_args(args, parser):
586625 )
587626
588627 # --async-mode requires --async-aware
589- if hasattr (args , 'async_mode' ) and args .async_mode != "running" and not args . async_aware :
628+ if hasattr (args , 'async_mode' ) and args .async_mode != "running" and not getattr ( args , ' async_aware' , False ) :
590629 parser .error ("--async-mode requires --async-aware to be enabled." )
591630
592631 # Live mode is incompatible with format options
@@ -614,15 +653,15 @@ def _validate_args(args, parser):
614653 return
615654
616655 # Validate gecko mode doesn't use non-wall mode
617- if args .format == "gecko" and args . mode != "wall" :
656+ if args .format == "gecko" and getattr ( args , ' mode' , 'wall' ) != "wall" :
618657 parser .error (
619658 "--mode option is incompatible with --gecko. "
620659 "Gecko format automatically includes both GIL-holding and CPU status analysis."
621660 )
622661
623662 # Validate --opcodes is only used with compatible formats
624663 opcodes_compatible_formats = ("live" , "gecko" , "flamegraph" , "heatmap" )
625- if args . opcodes and args .format not in opcodes_compatible_formats :
664+ if getattr ( args , ' opcodes' , False ) and args .format not in opcodes_compatible_formats :
626665 parser .error (
627666 f"--opcodes is only compatible with { ', ' .join ('--' + f for f in opcodes_compatible_formats )} ."
628667 )
@@ -646,6 +685,16 @@ def _validate_args(args, parser):
646685
647686def main ():
648687 """Main entry point for the CLI."""
688+ # Set locale for number formatting, restore on exit
689+ old_locale = locale .setlocale (locale .LC_ALL , None )
690+ locale .setlocale (locale .LC_ALL , "" )
691+ try :
692+ _main ()
693+ finally :
694+ locale .setlocale (locale .LC_ALL , old_locale )
695+
696+
697+ def _main ():
649698 # Create the main parser
650699 parser = argparse .ArgumentParser (
651700 description = _HELP_DESCRIPTION ,
@@ -734,6 +783,30 @@ def main():
734783 _add_format_options (attach_parser )
735784 _add_pstats_options (attach_parser )
736785
786+ # === REPLAY COMMAND ===
787+ replay_parser = subparsers .add_parser (
788+ "replay" ,
789+ help = "Replay a binary profile and convert to another format" ,
790+ formatter_class = CustomFormatter ,
791+ description = """Replay a binary profile file and convert to another format
792+
793+ Examples:
794+ # Convert binary to flamegraph
795+ `python -m profiling.sampling replay --flamegraph -o output.html profile.bin`
796+
797+ # Convert binary to pstats and print to stdout
798+ `python -m profiling.sampling replay profile.bin`
799+
800+ # Convert binary to gecko format
801+ `python -m profiling.sampling replay --gecko -o profile.json profile.bin`""" ,
802+ )
803+ replay_parser .add_argument (
804+ "input_file" ,
805+ help = "Binary profile file to replay" ,
806+ )
807+ _add_format_options (replay_parser , include_compression = False , include_binary = False )
808+ _add_pstats_options (replay_parser )
809+
737810 # Parse arguments
738811 args = parser .parse_args ()
739812
@@ -744,6 +817,7 @@ def main():
744817 command_handlers = {
745818 "run" : _handle_run ,
746819 "attach" : _handle_attach ,
820+ "replay" : _handle_replay ,
747821 }
748822
749823 # Execute the appropriate command
@@ -775,8 +849,16 @@ def _handle_attach(args):
775849 mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
776850 )
777851
852+ output_file = None
853+ if args .format == "binary" :
854+ output_file = args .outfile or _generate_output_filename (args .format , args .pid )
855+
778856 # Create the appropriate collector
779- collector = _create_collector (args .format , args .interval , skip_idle , args .opcodes )
857+ collector = _create_collector (
858+ args .format , args .interval , skip_idle , args .opcodes ,
859+ output_file = output_file ,
860+ compression = getattr (args , 'compression' , 'auto' )
861+ )
780862
781863 with _get_child_monitor_context (args , args .pid ):
782864 collector = sample (
@@ -845,8 +927,16 @@ def _handle_run(args):
845927 mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
846928 )
847929
930+ output_file = None
931+ if args .format == "binary" :
932+ output_file = args .outfile or _generate_output_filename (args .format , process .pid )
933+
848934 # Create the appropriate collector
849- collector = _create_collector (args .format , args .interval , skip_idle , args .opcodes )
935+ collector = _create_collector (
936+ args .format , args .interval , skip_idle , args .opcodes ,
937+ output_file = output_file ,
938+ compression = getattr (args , 'compression' , 'auto' )
939+ )
850940
851941 with _get_child_monitor_context (args , process .pid ):
852942 try :
@@ -980,5 +1070,48 @@ def _handle_live_run(args):
9801070 pass
9811071
9821072
1073+ def _handle_replay (args ):
1074+ """Handle the 'replay' command - convert binary profile to another format."""
1075+ import os
1076+
1077+ if not os .path .exists (args .input_file ):
1078+ sys .exit (f"Error: Input file not found: { args .input_file } " )
1079+
1080+ with BinaryReader (args .input_file ) as reader :
1081+ info = reader .get_info ()
1082+ interval = info ['sample_interval_us' ]
1083+
1084+ print (f"Replaying { info ['sample_count' ]} samples from { args .input_file } " )
1085+ print (f" Sample interval: { interval } us" )
1086+ print (f" Compression: { 'zstd' if info .get ('compression_type' , 0 ) == 1 else 'none' } " )
1087+
1088+ collector = _create_collector (args .format , interval , skip_idle = False )
1089+
1090+ def progress_callback (current , total ):
1091+ if total > 0 :
1092+ pct = current / total
1093+ bar_width = 40
1094+ filled = int (bar_width * pct )
1095+ bar = '█' * filled + '░' * (bar_width - filled )
1096+ print (f"\r [{ bar } ] { pct * 100 :5.1f} % ({ current :,} /{ total :,} )" , end = "" , flush = True )
1097+
1098+ count = reader .replay_samples (collector , progress_callback )
1099+ print ()
1100+
1101+ if args .format == "pstats" :
1102+ if args .outfile :
1103+ collector .export (args .outfile )
1104+ else :
1105+ sort_choice = args .sort if args .sort is not None else "nsamples"
1106+ limit = args .limit if args .limit is not None else 15
1107+ sort_mode = _sort_to_mode (sort_choice )
1108+ collector .print_stats (sort_mode , limit , not args .no_summary , PROFILING_MODE_WALL )
1109+ else :
1110+ filename = args .outfile or _generate_output_filename (args .format , os .getpid ())
1111+ collector .export (filename )
1112+
1113+ print (f"Replayed { count } samples" )
1114+
1115+
9831116if __name__ == "__main__" :
9841117 main ()
0 commit comments