diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f40af174..e804a9b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,12 @@ jobs: - name: Checkout uses: actions/checkout@v4 + # actions/checkout sets safe.directory under a temporary HOME that is + # reverted when the step ends, so subsequent steps run as root see the + # workspace as "not a git repository". Re-add it to the persistent config. + - name: Mark workspace as safe for git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + - name: Install Zig uses: mlugg/setup-zig@v2 with: diff --git a/src/api/load.zig b/src/api/load.zig index ef0b94ca..796c7e8b 100644 --- a/src/api/load.zig +++ b/src/api/load.zig @@ -76,6 +76,7 @@ pub fn loadStreamWithOptions(allocator: std.mem.Allocator, input: []const u8, op var load_failure: loader.LoadFailure = .unknown; const documents = loader.loadStreamFromEventsWithFailure( arena.allocator(), + allocator, event_stream.events, options.schema, options.duplicate_key_behavior, @@ -103,11 +104,7 @@ pub fn loadStreamWithOptions(allocator: std.mem.Allocator, input: []const u8, op fn loadStreamFastPath(allocator: std.mem.Allocator, input: []const u8, options: LoadOptions) Error!?LoadedStream { if (std.mem.indexOfScalar(u8, input, '*') != null) return null; - var arena = std.heap.ArenaAllocator.init(allocator); - errdefer arena.deinit(); - const arena_allocator = arena.allocator(); - - const event_stream = try parse.parseEventsWithOptions(arena_allocator, input, .{ + var event_stream = try parse.parseEventsWithOptions(allocator, input, .{ .max_input_bytes = options.max_input_bytes, .max_event_count = options.max_event_count, .max_token_count = options.max_token_count, @@ -115,10 +112,16 @@ fn loadStreamFastPath(allocator: std.mem.Allocator, input: []const u8, options: .max_scalar_bytes = options.max_scalar_bytes, .diagnostic = options.diagnostic, }); + defer event_stream.deinit(); + + var arena = std.heap.ArenaAllocator.init(allocator); + errdefer arena.deinit(); + const arena_allocator = arena.allocator(); var load_failure: loader.LoadFailure = .unknown; const documents = loader.loadStreamFromEventsWithFailure( arena_allocator, + allocator, event_stream.events, options.schema, options.duplicate_key_behavior, @@ -127,7 +130,7 @@ fn loadStreamFastPath(allocator: std.mem.Allocator, input: []const u8, options: options.max_alias_expansion, options.max_document_count, &load_failure, - false, + true, ) catch |err| { if (options.diagnostic) |diagnostic| { if (diagnostic.message.len == 0) { diff --git a/src/loader/construct.zig b/src/loader/construct.zig index d991e7f9..eee34d71 100644 --- a/src/loader/construct.zig +++ b/src/loader/construct.zig @@ -36,6 +36,7 @@ pub fn constructStream( unknown_tag_behavior: UnknownTagBehavior, ) Error![]const *const Node { return constructStreamWithFailure( + allocator, allocator, documents, selected_schema, @@ -51,6 +52,7 @@ pub fn constructStream( /// copied into `allocator` so loaded values outlive parser event storage. pub fn constructStreamWithFailure( allocator: std.mem.Allocator, + temporary_allocator: std.mem.Allocator, documents: []const *const graph.Node, selected_schema: Schema, duplicate_key_behavior: DuplicateKeyBehavior, @@ -60,6 +62,7 @@ pub fn constructStreamWithFailure( ) Error![]const *const Node { var constructor: Constructor = .{ .allocator = allocator, + .temporary_allocator = temporary_allocator, .schema = selected_schema, .duplicate_key_behavior = duplicate_key_behavior, .unknown_tag_behavior = unknown_tag_behavior, @@ -71,6 +74,7 @@ pub fn constructStreamWithFailure( const Constructor = struct { allocator: std.mem.Allocator, + temporary_allocator: std.mem.Allocator, schema: Schema, duplicate_key_behavior: DuplicateKeyBehavior, unknown_tag_behavior: UnknownTagBehavior, @@ -197,12 +201,12 @@ const Constructor = struct { .tag = try copyOptionalSlice(self.allocator, mapping.tag), } }; if (tag.isStandardSetTag(mapping.tag)) { - duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| { + duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| { self.recordFailure(.invalid_standard_tag); return err; }; } else if (self.duplicate_key_behavior == .reject) { - duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| { + duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| { self.recordFailure(.duplicate_key); return err; }; diff --git a/src/loader/direct.zig b/src/loader/direct.zig index 29fcdd15..a31850b3 100644 --- a/src/loader/direct.zig +++ b/src/loader/direct.zig @@ -45,6 +45,7 @@ pub fn loadStreamFromEvents( const summary = limit.summarizeEvents(events); try limit.checkSummary(summary, .{ .max_document_count = max_document_count }, load_failure); return loadStreamFromEventsWithSummary( + allocator, allocator, events, selected_schema, @@ -57,6 +58,7 @@ pub fn loadStreamFromEvents( pub fn loadStreamFromEventsWithSummary( allocator: std.mem.Allocator, + temporary_allocator: std.mem.Allocator, events: []const Event, selected_schema: Schema, duplicate_key_behavior: DuplicateKeyBehavior, @@ -66,6 +68,7 @@ pub fn loadStreamFromEventsWithSummary( ) Error![]const *const Node { return loadStreamFromEventsWithStringPolicy( allocator, + temporary_allocator, events, selected_schema, duplicate_key_behavior, @@ -87,6 +90,7 @@ pub fn loadStreamFromEventsBorrowingStringsWithSummary( load_failure: ?*LoadFailure, ) Error![]const *const Node { return loadStreamFromEventsWithStringPolicy( + allocator, allocator, events, selected_schema, @@ -100,6 +104,7 @@ pub fn loadStreamFromEventsBorrowingStringsWithSummary( pub fn loadStreamFromEventsWithStringPolicy( allocator: std.mem.Allocator, + temporary_allocator: std.mem.Allocator, events: []const Event, selected_schema: Schema, duplicate_key_behavior: DuplicateKeyBehavior, @@ -109,9 +114,17 @@ pub fn loadStreamFromEventsWithStringPolicy( copy_strings: bool, ) Error![]const *const Node { if (summary.has_aliases) return ParseError.Unsupported; + const collection_child_counts = buildDirectCollectionChildCounts(temporary_allocator, events) catch |err| { + if (err == ParseError.InvalidSyntax) recordFailure(load_failure, .invalid_graph); + return err; + }; + defer if (collection_child_counts.len > 0) temporary_allocator.free(collection_child_counts); + var loader: DirectLoader = .{ .allocator = allocator, + .temporary_allocator = temporary_allocator, .events = events, + .collection_child_counts = collection_child_counts, .schema = selected_schema, .duplicate_key_behavior = duplicate_key_behavior, .unknown_tag_behavior = unknown_tag_behavior, @@ -125,7 +138,10 @@ pub fn loadStreamFromEventsWithStringPolicy( const DirectLoader = struct { allocator: std.mem.Allocator, + temporary_allocator: std.mem.Allocator, events: []const Event, + collection_child_counts: []const usize, + collection_child_count_index: usize = 0, schema: Schema, duplicate_key_behavior: DuplicateKeyBehavior, unknown_tag_behavior: UnknownTagBehavior, @@ -177,8 +193,8 @@ const DirectLoader = struct { return switch (current) { .scalar => |scalar| self.constructScalar(scalar), - .sequence_start => |collection| self.constructSequence(collection), - .mapping_start => |collection| self.constructMapping(collection), + .sequence_start => |collection| self.constructSequence(try self.nextCollectionChildCount(), collection), + .mapping_start => |collection| self.constructMapping(try self.nextCollectionChildCount(), collection), .alias => ParseError.Unsupported, else => self.invalidGraph(), }; @@ -208,13 +224,13 @@ const DirectLoader = struct { return node; } - fn constructSequence(self: *DirectLoader, collection: parser_event.CollectionStart) Error!*const Node { + fn constructSequence(self: *DirectLoader, child_count: usize, collection: parser_event.CollectionStart) Error!*const Node { const node = try self.nodes.create(); try self.validateTag(collection.tag, .sequence); var items: std.ArrayList(*const Node) = .empty; errdefer items.deinit(self.allocator); - try items.ensureTotalCapacity(self.allocator, countDirectCollectionNodes(self.events, self.index, false)); + try items.ensureTotalCapacity(self.allocator, child_count); while (self.index < self.events.len and self.events[self.index] != .sequence_end) { try items.append(self.allocator, try self.constructNode()); @@ -240,13 +256,13 @@ const DirectLoader = struct { return node; } - fn constructMapping(self: *DirectLoader, collection: parser_event.CollectionStart) Error!*const Node { + fn constructMapping(self: *DirectLoader, child_count: usize, collection: parser_event.CollectionStart) Error!*const Node { const node = try self.nodes.create(); try self.validateTag(collection.tag, .mapping); var pairs: std.ArrayList(MappingPair) = .empty; errdefer pairs.deinit(self.allocator); - try pairs.ensureTotalCapacity(self.allocator, countDirectCollectionNodes(self.events, self.index, true) / 2); + try pairs.ensureTotalCapacity(self.allocator, child_count / 2); while (self.index < self.events.len and self.events[self.index] != .mapping_end) { const key = try self.constructNode(); @@ -268,12 +284,12 @@ const DirectLoader = struct { } }; if (tag.isStandardSetTag(collection.tag)) { try self.validateStandardSetContent(owned_pairs); - duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| { + duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| { self.recordFailure(.invalid_standard_tag); return err; }; } else if (self.duplicate_key_behavior == .reject) { - duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| { + duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| { self.recordFailure(.duplicate_key); return err; }; @@ -360,42 +376,73 @@ const DirectLoader = struct { fn retainOptionalSlice(self: *DirectLoader, maybe_value: ?[]const u8) std.mem.Allocator.Error!?[]const u8 { return if (maybe_value) |slice| try self.retainSlice(slice) else null; } + + fn nextCollectionChildCount(self: *DirectLoader) Error!usize { + if (self.collection_child_count_index >= self.collection_child_counts.len) return self.invalidGraph(); + const child_count = self.collection_child_counts[self.collection_child_count_index]; + self.collection_child_count_index += 1; + return child_count; + } }; -fn isSetNullValue(node: *const Node) bool { - return switch (node.*) { - .null_value => true, - .scalar => |scalar| schema.isCoreNullScalar(scalar.value, scalar.style == .plain, scalar.tag), - else => false, - }; -} +fn buildDirectCollectionChildCounts(allocator: std.mem.Allocator, events: []const Event) Error![]usize { + const collection_count = countCollectionStarts(events); + if (collection_count == 0) return &[_]usize{}; -fn countDirectCollectionNodes(events: []const Event, start: usize, mapping: bool) usize { - var count: usize = 0; - var depth: usize = 0; - var index = start; - while (index < events.len) : (index += 1) { - const event_value = events[index]; - if (depth == 0 and collectionEnded(event_value, mapping)) return count; - if (depth == 0 and eventStartsNode(event_value)) count += 1; + const counts = try allocator.alloc(usize, collection_count); + errdefer allocator.free(counts); + @memset(counts, 0); + + const stack = try allocator.alloc(usize, collection_count); + defer allocator.free(stack); + + var stack_len: usize = 0; + var next_collection: usize = 0; + for (events) |event_value| { switch (event_value) { - .sequence_start, .mapping_start => depth += 1, + .sequence_start, .mapping_start => { + if (stack_len > 0) counts[stack[stack_len - 1]] += 1; + stack[stack_len] = next_collection; + stack_len += 1; + next_collection += 1; + }, + .scalar, .alias => { + if (stack_len > 0) counts[stack[stack_len - 1]] += 1; + }, .sequence_end, .mapping_end => { - if (depth > 0) depth -= 1; + if (stack_len == 0) return ParseError.InvalidSyntax; + stack_len -= 1; }, else => {}, } } + + if (stack_len != 0) return ParseError.InvalidSyntax; + if (next_collection != collection_count) return ParseError.InvalidSyntax; + return counts; +} + +fn countCollectionStarts(events: []const Event) usize { + var count: usize = 0; + for (events) |event_value| { + switch (event_value) { + .sequence_start, .mapping_start => count += 1, + else => {}, + } + } return count; } -fn collectionEnded(event_value: Event, mapping: bool) bool { - return if (mapping) event_value == .mapping_end else event_value == .sequence_end; +fn recordFailure(load_failure: ?*LoadFailure, load_failure_value: LoadFailure) void { + if (load_failure) |target| { + if (target.* == .unknown) target.* = load_failure_value; + } } -fn eventStartsNode(event_value: Event) bool { - return switch (event_value) { - .scalar, .sequence_start, .mapping_start => true, +fn isSetNullValue(node: *const Node) bool { + return switch (node.*) { + .null_value => true, + .scalar => |scalar| schema.isCoreNullScalar(scalar.value, scalar.style == .plain, scalar.tag), else => false, }; } @@ -403,3 +450,29 @@ fn eventStartsNode(event_value: Event) bool { test { std.testing.refAllDecls(@This()); } + +test "direct loader precomputes collection child counts" { + const events = [_]Event{ + .stream_start, + .{ .document_start = .{} }, + .{ .mapping_start = .{ .style = .flow } }, + .{ .scalar = .{ .value = "items" } }, + .{ .sequence_start = .{ .style = .flow } }, + .{ .scalar = .{ .value = "one" } }, + .{ .mapping_start = .{ .style = .flow } }, + .{ .scalar = .{ .value = "key" } }, + .{ .scalar = .{ .value = "value" } }, + .mapping_end, + .sequence_end, + .mapping_end, + .{ .document_end = .{} }, + .stream_end, + }; + + const counts = try buildDirectCollectionChildCounts(std.testing.allocator, &events); + defer std.testing.allocator.free(counts); + + try std.testing.expectEqual(@as(usize, 2), counts[0]); + try std.testing.expectEqual(@as(usize, 2), counts[1]); + try std.testing.expectEqual(@as(usize, 2), counts[2]); +} diff --git a/src/loader/loader.zig b/src/loader/loader.zig index 32cbeb2b..3ee520da 100644 --- a/src/loader/loader.zig +++ b/src/loader/loader.zig @@ -26,10 +26,6 @@ const UnknownTagBehavior = options.UnknownTagBehavior; pub const LoadFailure = failure.LoadFailure; -/// Loads YAML parser events into arena-owned public value-model document roots. -/// -/// The caller owns returned slice and all returned node data through -/// `allocator`. In normal public use this allocator is a document arena. pub fn loadStreamFromEvents( allocator: std.mem.Allocator, events: []const Event, @@ -41,6 +37,7 @@ pub fn loadStreamFromEvents( max_document_count: ?usize, ) Error![]const *const Node { return loadStreamFromEventsWithFailure( + allocator, allocator, events, selected_schema, @@ -54,10 +51,9 @@ pub fn loadStreamFromEvents( ); } -/// Loads YAML parser events and records loader-stage failure categories when a -/// diagnostic-aware public API needs to describe non-parser failures. pub fn loadStreamFromEventsWithFailure( allocator: std.mem.Allocator, + temporary_allocator: std.mem.Allocator, events: []const Event, selected_schema: Schema, duplicate_key_behavior: DuplicateKeyBehavior, @@ -77,6 +73,7 @@ pub fn loadStreamFromEventsWithFailure( if (!summary.has_aliases) { return direct.loadStreamFromEventsWithStringPolicy( allocator, + temporary_allocator, events, selected_schema, duplicate_key_behavior, @@ -89,6 +86,7 @@ pub fn loadStreamFromEventsWithFailure( return loadStreamFromEventsComposedUnchecked( allocator, + temporary_allocator, events, selected_schema, duplicate_key_behavior, @@ -119,6 +117,7 @@ pub fn loadStreamFromEventsComposed( }, load_failure); return loadStreamFromEventsComposedUnchecked( + allocator, allocator, events, selected_schema, @@ -133,6 +132,7 @@ pub fn loadStreamFromEventsComposed( fn loadStreamFromEventsComposedUnchecked( allocator: std.mem.Allocator, + temporary_allocator: std.mem.Allocator, events: []const Event, selected_schema: Schema, duplicate_key_behavior: DuplicateKeyBehavior, @@ -157,6 +157,7 @@ fn loadStreamFromEventsComposedUnchecked( return construct.constructStreamWithFailure( allocator, + temporary_allocator, graph_documents, selected_schema, duplicate_key_behavior, diff --git a/tests/structure/unit_size_test.zig b/tests/structure/unit_size_test.zig index 5221f79d..1c17a191 100644 --- a/tests/structure/unit_size_test.zig +++ b/tests/structure/unit_size_test.zig @@ -39,6 +39,7 @@ test "structure: unit regression tests stay split by feature" { .{ .path = "tests/unit/api/root_api_test.zig", .max_lines = 600 }, .{ .path = "tests/unit/api/support.zig", .max_lines = 600 }, .{ .path = "tests/unit/api/load_string_ownership_test.zig", .max_lines = 180 }, + .{ .path = "tests/unit/api/load_memory_test.zig", .max_lines = 160 }, .{ .path = "tests/unit/api/load_test.zig", .max_lines = 900 }, .{ .path = "tests/unit/api/tags_test.zig", .max_lines = 2000 }, .{ .path = "tests/unit/api/emit_test.zig", .max_lines = 2700 }, diff --git a/tests/unit/api/load_memory_test.zig b/tests/unit/api/load_memory_test.zig new file mode 100644 index 00000000..3fd01ffa --- /dev/null +++ b/tests/unit/api/load_memory_test.zig @@ -0,0 +1,107 @@ +//! Purpose: Verify public load temporary-storage memory boundaries. +//! Owns: Public load/loadStream allocation lifetime regression tests. +//! Does not own: General load behavior, string decoding, parser events, or diagnostics. +//! Depends on: tests/unit/api/support.zig. +//! Tested by: zig build test-unit. + +const support = @import("support.zig"); + +const std = support.std; +const expectScalarString = support.expectScalarString; +const loadStreamWithOptions = support.loadStreamWithOptions; + +test "loadStreamWithOptions fast path releases parser storage before returning" { + var counter: LiveTrackingAllocator = .{ .child = std.testing.allocator }; + const input = try std.testing.allocator.dupe(u8, + \\! &map + \\plain: plain value + \\single: 'single ''quoted'' value' + \\double: "decoded\nvalue" + \\tagged: ! &tagged tagged value + \\ + ); + + var stream = try loadStreamWithOptions(counter.allocator(), input, .{}); + defer stream.deinit(); + + const retained_after_load = counter.live_bytes; + const peak_during_load = counter.peak_live_bytes; + + @memset(input, 0xa5); + std.testing.allocator.free(input); + + try std.testing.expect(peak_during_load > retained_after_load); + try std.testing.expectEqual(@as(usize, 1), stream.documents.len); + const root = stream.documents[0]; + try std.testing.expect(root.* == .mapping); + try std.testing.expectEqualStrings("map", root.mapping.anchor.?); + try std.testing.expectEqualStrings("tag:example.com,2000:map", root.mapping.tag.?); + try std.testing.expectEqual(@as(usize, 4), root.mapping.pairs.len); + + try expectScalarString(root.mapping.pairs[0].value, "plain value"); + try expectScalarString(root.mapping.pairs[1].value, "single 'quoted' value"); + try expectScalarString(root.mapping.pairs[2].value, "decoded\nvalue"); + + const tagged = root.mapping.pairs[3].value; + try expectScalarString(tagged, "tagged value"); + try std.testing.expectEqualStrings("tagged", tagged.scalar.anchor.?); + try std.testing.expectEqualStrings("tag:example.com,2000:tag", tagged.scalar.tag.?); +} + +const LiveTrackingAllocator = struct { + child: std.mem.Allocator, + live_bytes: usize = 0, + peak_live_bytes: usize = 0, + + fn allocator(self: *LiveTrackingAllocator) std.mem.Allocator { + return .{ + .ptr = self, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = free, + }, + }; + } + + fn alloc(context: *anyopaque, len: usize, alignment: std.mem.Alignment, ret_addr: usize) ?[*]u8 { + const self: *LiveTrackingAllocator = @ptrCast(@alignCast(context)); + const result = self.child.rawAlloc(len, alignment, ret_addr) orelse return null; + self.addLiveBytes(len); + return result; + } + + fn resize(context: *anyopaque, memory: []u8, alignment: std.mem.Alignment, new_len: usize, ret_addr: usize) bool { + const self: *LiveTrackingAllocator = @ptrCast(@alignCast(context)); + if (!self.child.rawResize(memory, alignment, new_len, ret_addr)) return false; + self.countResize(memory.len, new_len); + return true; + } + + fn remap(context: *anyopaque, memory: []u8, alignment: std.mem.Alignment, new_len: usize, ret_addr: usize) ?[*]u8 { + const self: *LiveTrackingAllocator = @ptrCast(@alignCast(context)); + const result = self.child.rawRemap(memory, alignment, new_len, ret_addr) orelse return null; + self.countResize(memory.len, new_len); + return result; + } + + fn free(context: *anyopaque, memory: []u8, alignment: std.mem.Alignment, ret_addr: usize) void { + const self: *LiveTrackingAllocator = @ptrCast(@alignCast(context)); + self.live_bytes -= memory.len; + self.child.rawFree(memory, alignment, ret_addr); + } + + fn countResize(self: *LiveTrackingAllocator, old_len: usize, new_len: usize) void { + if (new_len > old_len) { + self.addLiveBytes(new_len - old_len); + } else { + self.live_bytes -= old_len - new_len; + } + } + + fn addLiveBytes(self: *LiveTrackingAllocator, len: usize) void { + self.live_bytes += len; + self.peak_live_bytes = @max(self.peak_live_bytes, self.live_bytes); + } +}; diff --git a/tests/unit/api/root_api_test.zig b/tests/unit/api/root_api_test.zig index a83a60d8..912ef915 100644 --- a/tests/unit/api/root_api_test.zig +++ b/tests/unit/api/root_api_test.zig @@ -7,6 +7,7 @@ comptime { _ = @import("parse_test.zig"); _ = @import("load_test.zig"); + _ = @import("load_memory_test.zig"); _ = @import("tags_test.zig"); _ = @import("emit_test.zig"); _ = @import("diagnostics_test.zig"); diff --git a/tests/unit/loader/direct_test.zig b/tests/unit/loader/direct_test.zig index 75467ae1..6cd261cc 100644 --- a/tests/unit/loader/direct_test.zig +++ b/tests/unit/loader/direct_test.zig @@ -78,6 +78,50 @@ test "loader direct: load options match composed path" { try expectDirectMatchesComposed(&unknown_tag_events, .core, .reject, .preserve, null); } +test "loader direct: duplicate key validation uses temporary allocator" { + const pair_count = 40; + var key_strings: [pair_count][]u8 = undefined; + for (&key_strings, 0..) |*key, index| { + key.* = try std.fmt.allocPrint(std.testing.allocator, "key-{d}", .{index}); + } + defer for (key_strings) |key| std.testing.allocator.free(key); + + var events: [pair_count * 2 + 6]Event = undefined; + events[0] = .stream_start; + events[1] = .{ .document_start = .{} }; + events[2] = .{ .mapping_start = .{ .style = .block } }; + for (key_strings, 0..) |key, index| { + events[3 + index * 2] = .{ .scalar = .{ .value = key } }; + events[4 + index * 2] = .{ .scalar = .{ .value = "value" } }; + } + events[events.len - 3] = .mapping_end; + events[events.len - 2] = .{ .document_end = .{} }; + events[events.len - 1] = .stream_end; + + var output_arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer output_arena.deinit(); + var temporary_counter: LiveCountingAllocator = .{ .child = std.testing.allocator }; + + const documents = try loader.loadStreamFromEventsWithFailure( + output_arena.allocator(), + temporary_counter.allocator(), + &events, + .core, + .reject, + .preserve, + null, + null, + null, + null, + true, + ); + + try std.testing.expectEqual(@as(usize, 1), documents.len); + try std.testing.expectEqual(@as(usize, pair_count), documents[0].mapping.pairs.len); + try std.testing.expect(temporary_counter.allocations > 0); + try std.testing.expectEqual(@as(usize, 0), temporary_counter.live_bytes); +} + test "loader: alias events use composed fallback" { const events = [_]Event{ .stream_start, @@ -396,3 +440,57 @@ fn expectOptionalStringEqual(expected: ?[]const u8, actual: ?[]const u8) !void { try std.testing.expect(actual == null); } } + +const LiveCountingAllocator = struct { + child: std.mem.Allocator, + allocations: usize = 0, + live_bytes: usize = 0, + + fn allocator(self: *LiveCountingAllocator) std.mem.Allocator { + return .{ + .ptr = self, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = free, + }, + }; + } + + fn alloc(context: *anyopaque, len: usize, alignment: std.mem.Alignment, ret_addr: usize) ?[*]u8 { + const self: *LiveCountingAllocator = @ptrCast(@alignCast(context)); + const result = self.child.rawAlloc(len, alignment, ret_addr) orelse return null; + self.allocations += 1; + self.live_bytes += len; + return result; + } + + fn resize(context: *anyopaque, memory: []u8, alignment: std.mem.Alignment, new_len: usize, ret_addr: usize) bool { + const self: *LiveCountingAllocator = @ptrCast(@alignCast(context)); + if (!self.child.rawResize(memory, alignment, new_len, ret_addr)) return false; + self.countResize(memory.len, new_len); + return true; + } + + fn remap(context: *anyopaque, memory: []u8, alignment: std.mem.Alignment, new_len: usize, ret_addr: usize) ?[*]u8 { + const self: *LiveCountingAllocator = @ptrCast(@alignCast(context)); + const result = self.child.rawRemap(memory, alignment, new_len, ret_addr) orelse return null; + self.countResize(memory.len, new_len); + return result; + } + + fn free(context: *anyopaque, memory: []u8, alignment: std.mem.Alignment, ret_addr: usize) void { + const self: *LiveCountingAllocator = @ptrCast(@alignCast(context)); + self.live_bytes -= memory.len; + self.child.rawFree(memory, alignment, ret_addr); + } + + fn countResize(self: *LiveCountingAllocator, old_len: usize, new_len: usize) void { + if (new_len > old_len) { + self.live_bytes += new_len - old_len; + } else { + self.live_bytes -= old_len - new_len; + } + } +}; diff --git a/tools/benchmark.zig b/tools/benchmark.zig index abc719a0..3bf9a361 100644 --- a/tools/benchmark.zig +++ b/tools/benchmark.zig @@ -39,6 +39,9 @@ const AllocationPath = enum { parser_events, composer_events, loader_events, + load_default, + load_failsafe_allow, + load_stream_default, fn label(self: AllocationPath) []const u8 { return switch (self) { @@ -46,6 +49,9 @@ const AllocationPath = enum { .parser_events => "parser-events", .composer_events => "composer-events", .loader_events => "loader-events", + .load_default => "load-default", + .load_failsafe_allow => "load-failsafe-allow-duplicates", + .load_stream_default => "load-stream-default", }; } }; @@ -55,9 +61,16 @@ const AllocationMetrics = struct { frees: usize, allocated_bytes: usize, freed_bytes: usize, + retained_bytes: usize, + peak_live_bytes: usize, checksum: u64, }; +const AllocationRun = struct { + checksum: u64, + retained_bytes: usize, +}; + const conformance_subset = \\--- \\foo: @@ -101,6 +114,12 @@ pub fn main(init: std.process.Init) !void { try reportAllocations(allocator, stdout, fixture, .parser_events); try reportAllocations(allocator, stdout, fixture, .composer_events); try reportAllocations(allocator, stdout, fixture, .loader_events); + if (fixture.single_document) { + try reportAllocations(allocator, stdout, fixture, .load_default); + try reportAllocations(allocator, stdout, fixture, .load_failsafe_allow); + } else { + try reportAllocations(allocator, stdout, fixture, .load_stream_default); + } } try stdout.flush(); @@ -223,6 +242,22 @@ test "benchmark fixtures include comparison-derived hot paths" { try expectFixture(fixtures, "scalar-heavy"); } +test "public load allocation metrics report retained and peak live bytes" { + const metrics = try measureAllocations(std.testing.allocator, + \\name: yaml + \\items: + \\ - one + \\ - two + \\ + , .load_default); + + try std.testing.expect(metrics.allocations > 0); + try std.testing.expect(metrics.allocated_bytes >= metrics.retained_bytes); + try std.testing.expect(metrics.peak_live_bytes >= metrics.retained_bytes); + try std.testing.expect(metrics.retained_bytes > 0); + try std.testing.expectEqual(metrics.allocated_bytes, metrics.freed_bytes); +} + fn expectFixture(fixtures: []const Fixture, name: []const u8) !void { for (fixtures) |fixture| { if (std.mem.eql(u8, fixture.name, name)) return; @@ -352,7 +387,7 @@ fn reportAllocations( ) !void { const metrics = try measureAllocations(allocator, fixture.input, allocation_path); try stdout.print( - "allocs input={s} bytes={} api={s} allocations={} frees={} allocated_bytes={} freed_bytes={} checksum={}\n", + "allocs input={s} bytes={} api={s} allocations={} frees={} allocated_bytes={} freed_bytes={} retained_bytes={} peak_live_bytes={} checksum={}\n", .{ fixture.name, fixture.input.len, @@ -361,6 +396,8 @@ fn reportAllocations( metrics.frees, metrics.allocated_bytes, metrics.freed_bytes, + metrics.retained_bytes, + metrics.peak_live_bytes, metrics.checksum, }, ); @@ -395,11 +432,14 @@ fn measureAllocations(allocator: std.mem.Allocator, input: []const u8, allocatio var counter: CountingAllocator = .{ .child = allocator }; const counted_allocator = counter.allocator(); - const checksum = switch (allocation_path) { - .scanner => checksumScanner(try yaml_internal.scanner.scan(counted_allocator, input)), - .parser_events => checksumParserEvents(try parseEvents(counted_allocator, input)), - .composer_events => try measureComposerAllocations(allocator, counted_allocator, input), - .loader_events => try measureLoaderAllocations(allocator, counted_allocator, input), + const run: AllocationRun = switch (allocation_path) { + .scanner => .{ .checksum = checksumScanner(try yaml_internal.scanner.scan(counted_allocator, input)), .retained_bytes = counter.live_bytes }, + .parser_events => .{ .checksum = checksumParserEvents(try parseEvents(counted_allocator, input)), .retained_bytes = counter.live_bytes }, + .composer_events => .{ .checksum = try measureComposerAllocations(allocator, counted_allocator, input), .retained_bytes = counter.live_bytes }, + .loader_events => .{ .checksum = try measureLoaderAllocations(allocator, counted_allocator, input), .retained_bytes = counter.live_bytes }, + .load_default => try measurePublicLoadAllocations(&counter, input, true, .core, .reject), + .load_failsafe_allow => try measurePublicLoadAllocations(&counter, input, true, .failsafe, .allow), + .load_stream_default => try measurePublicLoadAllocations(&counter, input, false, .core, .reject), }; return .{ @@ -407,7 +447,9 @@ fn measureAllocations(allocator: std.mem.Allocator, input: []const u8, allocatio .frees = counter.frees, .allocated_bytes = counter.allocated_bytes, .freed_bytes = counter.freed_bytes, - .checksum = checksum, + .retained_bytes = run.retained_bytes, + .peak_live_bytes = counter.peak_live_bytes, + .checksum = run.checksum, }; } @@ -437,6 +479,44 @@ fn measureLoaderAllocations(allocator: std.mem.Allocator, counted_allocator: std return checksum; } +fn measurePublicLoadAllocations( + counter: *CountingAllocator, + input: []const u8, + require_single_document: bool, + selected_schema: anytype, + duplicate_key_behavior: anytype, +) !AllocationRun { + const counted_allocator = counter.allocator(); + + if (require_single_document) { + var document = try yaml_internal.load_api.loadWithOptions(counted_allocator, input, .{ + .schema = selected_schema, + .duplicate_key_behavior = duplicate_key_behavior, + }); + defer document.deinit(); + + return .{ + .checksum = 1 +% checksumNode(document.root), + .retained_bytes = counter.live_bytes, + }; + } + + var stream = try yaml_internal.load_api.loadStreamWithOptions(counted_allocator, input, .{ + .schema = selected_schema, + .duplicate_key_behavior = duplicate_key_behavior, + }); + defer stream.deinit(); + + var checksum: u64 = stream.documents.len; + for (stream.documents) |document| { + checksum +%= checksumNode(document); + } + return .{ + .checksum = checksum, + .retained_bytes = counter.live_bytes, + }; +} + fn parseEvents(allocator: std.mem.Allocator, input: []const u8) !yaml_internal.parser.EventStream { var token_stream = try yaml_internal.scanner.scan(allocator, input); defer token_stream.deinit(); @@ -628,6 +708,8 @@ const CountingAllocator = struct { frees: usize = 0, allocated_bytes: usize = 0, freed_bytes: usize = 0, + live_bytes: usize = 0, + peak_live_bytes: usize = 0, fn allocator(self: *CountingAllocator) std.mem.Allocator { return .{ @@ -646,6 +728,7 @@ const CountingAllocator = struct { const result = self.child.rawAlloc(len, alignment, ret_addr) orelse return null; self.allocations += 1; self.allocated_bytes += len; + self.addLiveBytes(len); return result; } @@ -667,14 +750,24 @@ const CountingAllocator = struct { const self: *CountingAllocator = @ptrCast(@alignCast(context)); self.frees += 1; self.freed_bytes += memory.len; + self.live_bytes -= memory.len; self.child.rawFree(memory, alignment, ret_addr); } fn countResize(self: *CountingAllocator, old_len: usize, new_len: usize) void { if (new_len > old_len) { - self.allocated_bytes += new_len - old_len; + const delta = new_len - old_len; + self.allocated_bytes += delta; + self.addLiveBytes(delta); } else { - self.freed_bytes += old_len - new_len; + const delta = old_len - new_len; + self.freed_bytes += delta; + self.live_bytes -= delta; } } + + fn addLiveBytes(self: *CountingAllocator, len: usize) void { + self.live_bytes += len; + self.peak_live_bytes = @max(self.peak_live_bytes, self.live_bytes); + } };