Skip to content

Commit c880f85

Browse files
committed
Fix big endian
1 parent ed63156 commit c880f85

File tree

4 files changed

+95
-22
lines changed

4 files changed

+95
-22
lines changed

InternalDocs/profiling_binary_format.md

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ profiler data. This document describes the format's structure and the
55
design decisions behind it.
66

77
The implementation is in
8-
[`Modules/_remote_debugging/binary_io.c`](../Modules/_remote_debugging/binary_io.c)
8+
[`Modules/_remote_debugging/binary_io_writer.c`](../Modules/_remote_debugging/binary_io_writer.c)
9+
and [`Modules/_remote_debugging/binary_io_reader.c`](../Modules/_remote_debugging/binary_io_reader.c),
910
with declarations in
1011
[`Modules/_remote_debugging/binary_io.h`](../Modules/_remote_debugging/binary_io.h).
1112

@@ -412,6 +413,36 @@ index-to-value mapping, not value-to-index.
412413

413414
## Platform Considerations
414415

416+
### Byte Ordering and Cross-Platform Portability
417+
418+
The binary format uses **native byte order** for all multi-byte integer
419+
fields when writing. However, the reader supports **cross-endian reading**:
420+
files written on a little-endian system (x86, ARM) can be read on a
421+
big-endian system (s390x, PowerPC), and vice versa.
422+
423+
**Endianness Detection**: The magic number serves as an endianness marker.
424+
When read on a system with different byte order, it appears byte-swapped
425+
(`0x48434154` instead of `0x54414348`). The reader detects this and
426+
automatically byte-swaps all fixed-width integer fields during parsing.
427+
428+
**Writer Requirements**: Fixed-width integer fields must be written using
429+
`memcpy()` from properly-sized integer types. When the source variable's
430+
type differs from the field width (e.g., `size_t` being written as 4 bytes),
431+
explicit casting to the correct type (e.g., `uint32_t`) is required before
432+
`memcpy()`. On big-endian systems, copying from an oversized type would
433+
copy the wrong bytes (high-order zeros instead of the actual value).
434+
435+
**Reader Implementation**: The reader tracks whether byte-swapping is needed
436+
via a `needs_swap` flag set during header parsing. All fixed-width fields
437+
in the header, footer, and sample data are conditionally byte-swapped using
438+
inline swap functions (`bswap32`, `bswap64`).
439+
440+
Variable-length integers (varints) are byte-order independent since they
441+
encode values one byte at a time using the LEB128 scheme, so they require
442+
no special handling for cross-endian reading.
443+
444+
### Memory-Mapped I/O
445+
415446
On Unix systems (Linux, macOS), the reader uses `mmap()` to map the file
416447
into the process address space. The kernel handles paging data in and out
417448
as needed, no explicit read() calls or buffer management are required,

Modules/_remote_debugging/binary_io.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,16 @@ extern "C" {
2121
* BINARY FORMAT CONSTANTS
2222
* ============================================================================ */
2323

24-
#define BINARY_FORMAT_MAGIC 0x54414348 /* "TACH" (Tachyon) */
24+
#define BINARY_FORMAT_MAGIC 0x54414348 /* "TACH" (Tachyon) in native byte order */
25+
#define BINARY_FORMAT_MAGIC_SWAPPED 0x48434154 /* Byte-swapped magic for endianness detection */
2526
#define BINARY_FORMAT_VERSION 1
2627

28+
/* Conditional byte-swap macros for cross-endian file reading.
29+
* Uses Python's optimized byte-swap functions from pycore_bitutils.h */
30+
#define SWAP16_IF(swap, x) ((swap) ? _Py_bswap16(x) : (x))
31+
#define SWAP32_IF(swap, x) ((swap) ? _Py_bswap32(x) : (x))
32+
#define SWAP64_IF(swap, x) ((swap) ? _Py_bswap64(x) : (x))
33+
2734
/* Header field offsets and sizes */
2835
#define HDR_OFF_MAGIC 0
2936
#define HDR_SIZE_MAGIC 4
@@ -286,6 +293,7 @@ typedef struct {
286293
uint8_t py_major;
287294
uint8_t py_minor;
288295
uint8_t py_micro;
296+
int needs_swap; /* Non-zero if file was written on different-endian system */
289297
uint64_t start_time_us;
290298
uint64_t sample_interval_us;
291299
uint32_t sample_count;

Modules/_remote_debugging/binary_io_reader.c

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "binary_io.h"
1313
#include "_remote_debugging.h"
14+
#include "pycore_bitutils.h" /* _Py_bswap32, _Py_bswap64 for cross-endian reading */
1415
#include <string.h>
1516

1617
#ifdef HAVE_ZSTD
@@ -49,7 +50,13 @@ reader_parse_header(BinaryReader *reader, const uint8_t *data, size_t file_size)
4950
memcpy(&magic, &data[0], sizeof(magic));
5051
memcpy(&version, &data[4], sizeof(version));
5152

52-
if (magic != BINARY_FORMAT_MAGIC) {
53+
/* Detect endianness from magic number */
54+
if (magic == BINARY_FORMAT_MAGIC) {
55+
reader->needs_swap = 0;
56+
} else if (magic == BINARY_FORMAT_MAGIC_SWAPPED) {
57+
reader->needs_swap = 1;
58+
version = _Py_bswap32(version);
59+
} else {
5360
PyErr_Format(PyExc_ValueError, "Invalid magic number: 0x%08x", magic);
5461
return -1;
5562
}
@@ -77,13 +84,26 @@ reader_parse_header(BinaryReader *reader, const uint8_t *data, size_t file_size)
7784
reader->py_major = data[HDR_OFF_PY_MAJOR];
7885
reader->py_minor = data[HDR_OFF_PY_MINOR];
7986
reader->py_micro = data[HDR_OFF_PY_MICRO];
80-
memcpy(&reader->start_time_us, &data[HDR_OFF_START_TIME], HDR_SIZE_START_TIME);
81-
memcpy(&reader->sample_interval_us, &data[HDR_OFF_INTERVAL], HDR_SIZE_INTERVAL);
82-
memcpy(&reader->sample_count, &data[HDR_OFF_SAMPLES], HDR_SIZE_SAMPLES);
83-
memcpy(&reader->thread_count, &data[HDR_OFF_THREADS], HDR_SIZE_THREADS);
84-
memcpy(&reader->string_table_offset, &data[HDR_OFF_STR_TABLE], HDR_SIZE_STR_TABLE);
85-
memcpy(&reader->frame_table_offset, &data[HDR_OFF_FRAME_TABLE], HDR_SIZE_FRAME_TABLE);
86-
memcpy(&reader->compression_type, &data[HDR_OFF_COMPRESSION], HDR_SIZE_COMPRESSION);
87+
88+
/* Read header fields with byte-swapping if needed */
89+
uint64_t start_time_us, sample_interval_us, string_table_offset, frame_table_offset;
90+
uint32_t sample_count, thread_count, compression_type;
91+
92+
memcpy(&start_time_us, &data[HDR_OFF_START_TIME], HDR_SIZE_START_TIME);
93+
memcpy(&sample_interval_us, &data[HDR_OFF_INTERVAL], HDR_SIZE_INTERVAL);
94+
memcpy(&sample_count, &data[HDR_OFF_SAMPLES], HDR_SIZE_SAMPLES);
95+
memcpy(&thread_count, &data[HDR_OFF_THREADS], HDR_SIZE_THREADS);
96+
memcpy(&string_table_offset, &data[HDR_OFF_STR_TABLE], HDR_SIZE_STR_TABLE);
97+
memcpy(&frame_table_offset, &data[HDR_OFF_FRAME_TABLE], HDR_SIZE_FRAME_TABLE);
98+
memcpy(&compression_type, &data[HDR_OFF_COMPRESSION], HDR_SIZE_COMPRESSION);
99+
100+
reader->start_time_us = SWAP64_IF(reader->needs_swap, start_time_us);
101+
reader->sample_interval_us = SWAP64_IF(reader->needs_swap, sample_interval_us);
102+
reader->sample_count = SWAP32_IF(reader->needs_swap, sample_count);
103+
reader->thread_count = SWAP32_IF(reader->needs_swap, thread_count);
104+
reader->string_table_offset = SWAP64_IF(reader->needs_swap, string_table_offset);
105+
reader->frame_table_offset = SWAP64_IF(reader->needs_swap, frame_table_offset);
106+
reader->compression_type = (int)SWAP32_IF(reader->needs_swap, compression_type);
87107

88108
return 0;
89109
}
@@ -98,8 +118,12 @@ reader_parse_footer(BinaryReader *reader, const uint8_t *data, size_t file_size)
98118

99119
const uint8_t *footer = data + file_size - FILE_FOOTER_SIZE;
100120
/* Use memcpy to avoid strict aliasing violations */
101-
memcpy(&reader->strings_count, &footer[0], sizeof(reader->strings_count));
102-
memcpy(&reader->frames_count, &footer[4], sizeof(reader->frames_count));
121+
uint32_t strings_count, frames_count;
122+
memcpy(&strings_count, &footer[0], sizeof(strings_count));
123+
memcpy(&frames_count, &footer[4], sizeof(frames_count));
124+
125+
reader->strings_count = SWAP32_IF(reader->needs_swap, strings_count);
126+
reader->frames_count = SWAP32_IF(reader->needs_swap, frames_count);
103127

104128
return 0;
105129
}
@@ -857,15 +881,18 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
857881
break; /* End of data */
858882
}
859883

860-
/* Use memcpy to avoid strict aliasing violations */
861-
uint64_t thread_id;
862-
uint32_t interpreter_id;
863-
memcpy(&thread_id, &reader->sample_data[offset], sizeof(thread_id));
884+
/* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
885+
uint64_t thread_id_raw;
886+
uint32_t interpreter_id_raw;
887+
memcpy(&thread_id_raw, &reader->sample_data[offset], sizeof(thread_id_raw));
864888
offset += 8;
865889

866-
memcpy(&interpreter_id, &reader->sample_data[offset], sizeof(interpreter_id));
890+
memcpy(&interpreter_id_raw, &reader->sample_data[offset], sizeof(interpreter_id_raw));
867891
offset += 4;
868892

893+
uint64_t thread_id = SWAP64_IF(reader->needs_swap, thread_id_raw);
894+
uint32_t interpreter_id = SWAP32_IF(reader->needs_swap, interpreter_id_raw);
895+
869896
/* Get or create thread state for reconstruction */
870897
ReaderThreadState *ts = reader_get_or_create_thread_state(reader, thread_id, interpreter_id);
871898
if (!ts) {

Modules/_remote_debugging/binary_io_writer.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,8 +1055,12 @@ binary_writer_finalize(BinaryWriter *writer)
10551055
}
10561056
uint64_t file_size = (uint64_t)footer_offset + 32;
10571057
uint8_t footer[32] = {0};
1058-
memcpy(footer + 0, &writer->string_count, 4);
1059-
memcpy(footer + 4, &writer->frame_count, 4);
1058+
/* Cast size_t to uint32_t before memcpy to ensure correct bytes are copied
1059+
* on both little-endian and big-endian systems (size_t is 8 bytes on 64-bit) */
1060+
uint32_t string_count_u32 = (uint32_t)writer->string_count;
1061+
uint32_t frame_count_u32 = (uint32_t)writer->frame_count;
1062+
memcpy(footer + 0, &string_count_u32, 4);
1063+
memcpy(footer + 4, &frame_count_u32, 4);
10601064
memcpy(footer + 8, &file_size, 8);
10611065
/* bytes 16-31: checksum placeholder (zeros) */
10621066
if (fwrite_checked_allow_threads(footer, 32, writer->fp) < 0) {
@@ -1068,9 +1072,12 @@ binary_writer_finalize(BinaryWriter *writer)
10681072
return -1;
10691073
}
10701074

1071-
/* Convert file offsets to uint64_t for portable header format */
1075+
/* Convert file offsets and counts to fixed-width types for portable header format.
1076+
* This ensures correct behavior on both little-endian and big-endian systems. */
10721077
uint64_t string_table_offset_u64 = (uint64_t)string_table_offset;
10731078
uint64_t frame_table_offset_u64 = (uint64_t)frame_table_offset;
1079+
uint32_t thread_count_u32 = (uint32_t)writer->thread_count;
1080+
uint32_t compression_type_u32 = (uint32_t)writer->compression_type;
10741081

10751082
uint8_t header[FILE_HEADER_SIZE] = {0};
10761083
uint32_t magic = BINARY_FORMAT_MAGIC;
@@ -1083,10 +1090,10 @@ binary_writer_finalize(BinaryWriter *writer)
10831090
memcpy(header + HDR_OFF_START_TIME, &writer->start_time_us, HDR_SIZE_START_TIME);
10841091
memcpy(header + HDR_OFF_INTERVAL, &writer->sample_interval_us, HDR_SIZE_INTERVAL);
10851092
memcpy(header + HDR_OFF_SAMPLES, &writer->total_samples, HDR_SIZE_SAMPLES);
1086-
memcpy(header + HDR_OFF_THREADS, &writer->thread_count, HDR_SIZE_THREADS);
1093+
memcpy(header + HDR_OFF_THREADS, &thread_count_u32, HDR_SIZE_THREADS);
10871094
memcpy(header + HDR_OFF_STR_TABLE, &string_table_offset_u64, HDR_SIZE_STR_TABLE);
10881095
memcpy(header + HDR_OFF_FRAME_TABLE, &frame_table_offset_u64, HDR_SIZE_FRAME_TABLE);
1089-
memcpy(header + HDR_OFF_COMPRESSION, &writer->compression_type, HDR_SIZE_COMPRESSION);
1096+
memcpy(header + HDR_OFF_COMPRESSION, &compression_type_u32, HDR_SIZE_COMPRESSION);
10901097
if (fwrite_checked_allow_threads(header, FILE_HEADER_SIZE, writer->fp) < 0) {
10911098
return -1;
10921099
}

0 commit comments

Comments
 (0)