Skip to content

Commit 0ccea49

Browse files
committed
Address Laszlo's review
1 parent 81a8f2d commit 0ccea49

File tree

6 files changed

+103
-36
lines changed

6 files changed

+103
-36
lines changed

Doc/library/profiling.sampling.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,7 @@ The :option:`--compression` option controls data compression:
10841084

10851085
- ``auto`` (default): Use zstd compression if available, otherwise no
10861086
compression
1087-
- ``zstd``: Force zstd compression (requires zstd support)
1087+
- ``zstd``: Force zstd compression (requires :mod:`compression.zstd` support)
10881088
- ``none``: Disable compression
10891089

10901090
::

Lib/profiling/sampling/binary_collector.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import time
44

5+
import _remote_debugging
6+
57
from .collector import Collector
68

79
# Compression type constants (must match binary_io.h)
@@ -28,7 +30,6 @@ def _resolve_compression(compression):
2830
return COMPRESSION_ZSTD
2931
elif compression == 'auto':
3032
# Auto: use zstd if available, otherwise none
31-
import _remote_debugging
3233
if _remote_debugging.zstd_available():
3334
return COMPRESSION_ZSTD
3435
return COMPRESSION_NONE
@@ -57,8 +58,6 @@ def __init__(self, filename, sample_interval_usec, *, skip_idle=False,
5758
skip_idle: If True, skip idle threads (not used in binary format)
5859
compression: 'auto', 'zstd', 'none', or int (0=none, 1=zstd)
5960
"""
60-
import _remote_debugging
61-
6261
self.filename = filename
6362
self.sample_interval_usec = sample_interval_usec
6463
self.skip_idle = skip_idle

Lib/profiling/sampling/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .heatmap_collector import HeatmapCollector
1818
from .gecko_collector import GeckoCollector
1919
from .binary_collector import BinaryCollector
20-
from .binary_reader import BinaryReader, convert_binary_to_format
20+
from .binary_reader import BinaryReader
2121
from .constants import (
2222
PROFILING_MODE_ALL,
2323
PROFILING_MODE_WALL,

Modules/_remote_debugging/binary_io.h

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,37 @@ extern "C" {
2222
* ============================================================================ */
2323

2424
#define BINARY_FORMAT_MAGIC 0x54414348 /* "TACH" (Tachyon) */
25-
#define BINARY_FORMAT_VERSION 2
25+
#define BINARY_FORMAT_VERSION 3
26+
27+
/* Header field offsets and sizes */
28+
#define HDR_OFF_MAGIC 0
29+
#define HDR_SIZE_MAGIC 4
30+
#define HDR_OFF_VERSION (HDR_OFF_MAGIC + HDR_SIZE_MAGIC)
31+
#define HDR_SIZE_VERSION 4
32+
#define HDR_OFF_PY_VERSION (HDR_OFF_VERSION + HDR_SIZE_VERSION)
33+
#define HDR_SIZE_PY_VERSION 4 /* 3 bytes: major, minor, micro + 1 reserved */
34+
#define HDR_OFF_PY_MAJOR HDR_OFF_PY_VERSION
35+
#define HDR_OFF_PY_MINOR (HDR_OFF_PY_VERSION + 1)
36+
#define HDR_OFF_PY_MICRO (HDR_OFF_PY_VERSION + 2)
37+
#define HDR_OFF_START_TIME (HDR_OFF_PY_VERSION + HDR_SIZE_PY_VERSION)
38+
#define HDR_SIZE_START_TIME 8
39+
#define HDR_OFF_INTERVAL (HDR_OFF_START_TIME + HDR_SIZE_START_TIME)
40+
#define HDR_SIZE_INTERVAL 8
41+
#define HDR_OFF_SAMPLES (HDR_OFF_INTERVAL + HDR_SIZE_INTERVAL)
42+
#define HDR_SIZE_SAMPLES 4
43+
#define HDR_OFF_THREADS (HDR_OFF_SAMPLES + HDR_SIZE_SAMPLES)
44+
#define HDR_SIZE_THREADS 4
45+
#define HDR_OFF_STR_TABLE (HDR_OFF_THREADS + HDR_SIZE_THREADS)
46+
#define HDR_SIZE_STR_TABLE 8
47+
#define HDR_OFF_FRAME_TABLE (HDR_OFF_STR_TABLE + HDR_SIZE_STR_TABLE)
48+
#define HDR_SIZE_FRAME_TABLE 8
49+
#define HDR_OFF_COMPRESSION (HDR_OFF_FRAME_TABLE + HDR_SIZE_FRAME_TABLE)
50+
#define HDR_SIZE_COMPRESSION 4
51+
#define FILE_HEADER_SIZE (HDR_OFF_COMPRESSION + HDR_SIZE_COMPRESSION)
52+
#define FILE_HEADER_PLACEHOLDER_SIZE 64
53+
54+
static_assert(FILE_HEADER_SIZE <= FILE_HEADER_PLACEHOLDER_SIZE,
55+
"FILE_HEADER_SIZE exceeds FILE_HEADER_PLACEHOLDER_SIZE");
2656

2757
/* Buffer sizes: 512KB balances syscall amortization against memory use,
2858
* and aligns well with filesystem block sizes and zstd dictionary windows */
@@ -253,6 +283,9 @@ typedef struct {
253283
size_t decompressed_size;
254284

255285
/* Header metadata */
286+
uint8_t py_major;
287+
uint8_t py_minor;
288+
uint8_t py_micro;
256289
uint64_t start_time_us;
257290
uint64_t sample_interval_us;
258291
uint32_t sample_count;
@@ -415,10 +448,22 @@ grow_array(void *ptr, size_t *capacity, size_t elem_size)
415448
return new_ptr;
416449
}
417450

418-
/* Macro wrapper for type safety with grow_array */
419-
#define GROW_ARRAY(ptr, count, capacity, type) \
420-
((count) < (capacity) ? 0 : \
421-
((ptr) = grow_array((ptr), &(capacity), sizeof(type))) ? 0 : -1)
451+
static inline int
452+
grow_array_inplace(void **ptr_addr, size_t count, size_t *capacity, size_t elem_size)
453+
{
454+
if (count < *capacity) {
455+
return 0;
456+
}
457+
void *tmp = grow_array(*ptr_addr, capacity, elem_size);
458+
if (tmp == NULL) {
459+
return -1;
460+
}
461+
*ptr_addr = tmp;
462+
return 0;
463+
}
464+
465+
#define GROW_ARRAY(ptr, count, cap, type) \
466+
grow_array_inplace((void**)&(ptr), (count), &(cap), sizeof(type))
422467

423468
/* ============================================================================
424469
* BINARY WRITER API

Modules/_remote_debugging/binary_io_reader.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@
2222
* ============================================================================ */
2323

2424
/* File structure sizes */
25-
#define FILE_HEADER_PLACEHOLDER_SIZE 64 /* Placeholder written at file start */
26-
#define FILE_HEADER_SIZE 52 /* Actual header content size */
27-
#define FILE_FOOTER_SIZE 32 /* Footer size */
25+
#define FILE_FOOTER_SIZE 32
2826
#define MIN_DECOMPRESS_BUFFER_SIZE (64 * 1024) /* Minimum decompression buffer */
2927

3028
/* Progress callback frequency */
@@ -57,17 +55,35 @@ reader_parse_header(BinaryReader *reader, const uint8_t *data, size_t file_size)
5755
}
5856

5957
if (version != BINARY_FORMAT_VERSION) {
60-
PyErr_Format(PyExc_ValueError, "Unsupported version: %u", version);
58+
if (version > BINARY_FORMAT_VERSION && file_size >= HDR_OFF_PY_MICRO + 1) {
59+
/* Newer format - try to read Python version for better error */
60+
uint8_t py_major = data[HDR_OFF_PY_MAJOR];
61+
uint8_t py_minor = data[HDR_OFF_PY_MINOR];
62+
uint8_t py_micro = data[HDR_OFF_PY_MICRO];
63+
PyErr_Format(PyExc_ValueError,
64+
"Binary file was created with Python %u.%u.%u (format version %u), "
65+
"but this is Python %d.%d.%d (format version %d)",
66+
py_major, py_minor, py_micro, version,
67+
PY_MAJOR_VERSION, PY_MINOR_VERSION, PY_MICRO_VERSION,
68+
BINARY_FORMAT_VERSION);
69+
} else {
70+
PyErr_Format(PyExc_ValueError,
71+
"Unsupported format version %u (this reader supports version %d)",
72+
version, BINARY_FORMAT_VERSION);
73+
}
6174
return -1;
6275
}
6376

64-
memcpy(&reader->start_time_us, &data[8], sizeof(reader->start_time_us));
65-
memcpy(&reader->sample_interval_us, &data[16], sizeof(reader->sample_interval_us));
66-
memcpy(&reader->sample_count, &data[24], sizeof(reader->sample_count));
67-
memcpy(&reader->thread_count, &data[28], sizeof(reader->thread_count));
68-
memcpy(&reader->string_table_offset, &data[32], sizeof(reader->string_table_offset));
69-
memcpy(&reader->frame_table_offset, &data[40], sizeof(reader->frame_table_offset));
70-
memcpy(&reader->compression_type, &data[48], sizeof(reader->compression_type));
77+
reader->py_major = data[HDR_OFF_PY_MAJOR];
78+
reader->py_minor = data[HDR_OFF_PY_MINOR];
79+
reader->py_micro = data[HDR_OFF_PY_MICRO];
80+
memcpy(&reader->start_time_us, &data[HDR_OFF_START_TIME], HDR_SIZE_START_TIME);
81+
memcpy(&reader->sample_interval_us, &data[HDR_OFF_INTERVAL], HDR_SIZE_INTERVAL);
82+
memcpy(&reader->sample_count, &data[HDR_OFF_SAMPLES], HDR_SIZE_SAMPLES);
83+
memcpy(&reader->thread_count, &data[HDR_OFF_THREADS], HDR_SIZE_THREADS);
84+
memcpy(&reader->string_table_offset, &data[HDR_OFF_STR_TABLE], HDR_SIZE_STR_TABLE);
85+
memcpy(&reader->frame_table_offset, &data[HDR_OFF_FRAME_TABLE], HDR_SIZE_FRAME_TABLE);
86+
memcpy(&reader->compression_type, &data[HDR_OFF_COMPRESSION], HDR_SIZE_COMPRESSION);
7187

7288
return 0;
7389
}
@@ -1023,9 +1039,15 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
10231039
PyObject *
10241040
binary_reader_get_info(BinaryReader *reader)
10251041
{
1042+
PyObject *py_version = Py_BuildValue("(B,B,B)",
1043+
reader->py_major, reader->py_minor, reader->py_micro);
1044+
if (py_version == NULL) {
1045+
return NULL;
1046+
}
10261047
return Py_BuildValue(
1027-
"{s:I, s:K, s:K, s:I, s:I, s:I, s:I, s:i}",
1048+
"{s:I, s:N, s:K, s:K, s:I, s:I, s:I, s:I, s:i}",
10281049
"version", BINARY_FORMAT_VERSION,
1050+
"python_version", py_version,
10291051
"start_time_us", reader->start_time_us,
10301052
"sample_interval_us", reader->sample_interval_us,
10311053
"sample_count", reader->sample_count,

Modules/_remote_debugging/binary_io_writer.c

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@
3030
#define MAX_FRAME_BUFFER_SIZE ((MAX_STACK_DEPTH * MAX_VARINT_SIZE_U32) + MAX_VARINT_SIZE_U32 + 16)
3131

3232
/* File structure sizes */
33-
#define FILE_HEADER_PLACEHOLDER_SIZE 64 /* Placeholder written at file start */
34-
#define FILE_HEADER_SIZE 52 /* Actual header content size */
35-
#define FILE_FOOTER_SIZE 32 /* Footer size */
33+
#define FILE_FOOTER_SIZE 32
3634

3735
/* ============================================================================
3836
* WRITER-SPECIFIC UTILITY HELPERS
@@ -1073,19 +1071,22 @@ binary_writer_finalize(BinaryWriter *writer)
10731071
uint64_t string_table_offset_u64 = (uint64_t)string_table_offset;
10741072
uint64_t frame_table_offset_u64 = (uint64_t)frame_table_offset;
10751073

1076-
uint8_t header[52] = {0};
1074+
uint8_t header[FILE_HEADER_SIZE] = {0};
10771075
uint32_t magic = BINARY_FORMAT_MAGIC;
10781076
uint32_t version = BINARY_FORMAT_VERSION;
1079-
memcpy(header + 0, &magic, 4);
1080-
memcpy(header + 4, &version, 4);
1081-
memcpy(header + 8, &writer->start_time_us, 8);
1082-
memcpy(header + 16, &writer->sample_interval_us, 8);
1083-
memcpy(header + 24, &writer->total_samples, 4);
1084-
memcpy(header + 28, &writer->thread_count, 4);
1085-
memcpy(header + 32, &string_table_offset_u64, 8);
1086-
memcpy(header + 40, &frame_table_offset_u64, 8);
1087-
memcpy(header + 48, &writer->compression_type, 4);
1088-
if (fwrite_checked_allow_threads(header, 52, writer->fp) < 0) {
1077+
memcpy(header + HDR_OFF_MAGIC, &magic, HDR_SIZE_MAGIC);
1078+
memcpy(header + HDR_OFF_VERSION, &version, HDR_SIZE_VERSION);
1079+
header[HDR_OFF_PY_MAJOR] = PY_MAJOR_VERSION;
1080+
header[HDR_OFF_PY_MINOR] = PY_MINOR_VERSION;
1081+
header[HDR_OFF_PY_MICRO] = PY_MICRO_VERSION;
1082+
memcpy(header + HDR_OFF_START_TIME, &writer->start_time_us, HDR_SIZE_START_TIME);
1083+
memcpy(header + HDR_OFF_INTERVAL, &writer->sample_interval_us, HDR_SIZE_INTERVAL);
1084+
memcpy(header + HDR_OFF_SAMPLES, &writer->total_samples, HDR_SIZE_SAMPLES);
1085+
memcpy(header + HDR_OFF_THREADS, &writer->thread_count, HDR_SIZE_THREADS);
1086+
memcpy(header + HDR_OFF_STR_TABLE, &string_table_offset_u64, HDR_SIZE_STR_TABLE);
1087+
memcpy(header + HDR_OFF_FRAME_TABLE, &frame_table_offset_u64, HDR_SIZE_FRAME_TABLE);
1088+
memcpy(header + HDR_OFF_COMPRESSION, &writer->compression_type, HDR_SIZE_COMPRESSION);
1089+
if (fwrite_checked_allow_threads(header, FILE_HEADER_SIZE, writer->fp) < 0) {
10891090
return -1;
10901091
}
10911092

0 commit comments

Comments
 (0)